def push_corosync_conf( self, corosync_conf_facade, skip_offline_nodes=False ): corosync_conf_data = corosync_conf_facade.config.export() if self.is_corosync_conf_live: node_list = corosync_conf_facade.get_nodes() if corosync_conf_facade.need_stopped_cluster: check_corosync_offline_on_nodes( self.node_communicator(), self.report_processor, node_list, skip_offline_nodes ) distribute_corosync_conf( self.node_communicator(), self.report_processor, node_list, corosync_conf_data, skip_offline_nodes ) if is_service_running(self.cmd_runner(), "corosync"): reload_corosync_config(self.cmd_runner()) self.report_processor.process( reports.corosync_config_reloaded() ) if corosync_conf_facade.need_qdevice_reload: qdevice_reload_on_nodes( self.node_communicator(), self.report_processor, node_list, skip_offline_nodes ) else: self._corosync_conf_data = corosync_conf_data
def test_not_systemctl_not_running(self, mock_systemctl): mock_systemctl.return_value = False self.mock_runner.run.return_value = ("is stopped", "", 3) self.assertFalse(lib.is_service_running(self.mock_runner, self.service)) self.mock_runner.run.assert_called_once_with( [_service, self.service, "status"])
def _push_corosync_conf_live( self, target_list, corosync_conf_data, need_stopped_cluster, need_qdevice_reload, skip_offline_nodes ): if need_stopped_cluster: com_cmd = CheckCorosyncOffline( self.report_processor, skip_offline_nodes ) com_cmd.set_targets(target_list) run_and_raise(self.get_node_communicator(), com_cmd) com_cmd = DistributeCorosyncConf( self.report_processor, corosync_conf_data, skip_offline_nodes ) com_cmd.set_targets(target_list) run_and_raise(self.get_node_communicator(), com_cmd) if is_service_running(self.cmd_runner(), "corosync"): reload_corosync_config(self.cmd_runner()) self.report_processor.process( reports.corosync_config_reloaded() ) if need_qdevice_reload: self.report_processor.process( reports.qdevice_client_reload_started() ) com_cmd = qdevice.Stop(self.report_processor, skip_offline_nodes) com_cmd.set_targets(target_list) run(self.get_node_communicator(), com_cmd) report_list = com_cmd.error_list com_cmd = qdevice.Start(self.report_processor, skip_offline_nodes) com_cmd.set_targets(target_list) run(self.get_node_communicator(), com_cmd) report_list += com_cmd.error_list if report_list: raise LibraryError()
def test_not_systemctl_not_running(self, mock_systemctl): mock_systemctl.return_value = False self.mock_runner.run.return_value = ("", 3) self.assertFalse(lib.is_service_running(self.mock_runner, self.service)) self.mock_runner.run.assert_called_once_with( ["service", self.service, "status"] )
def test_systemctl_not_running(self, mock_systemctl): mock_systemctl.return_value = True self.mock_runner.run.return_value = ("inactive", "", 2) self.assertFalse(lib.is_service_running(self.mock_runner, self.service)) self.mock_runner.run.assert_called_once_with( [_systemctl, "is-active", self.service + ".service"])
def test_not_systemctl_running(self, mock_systemctl): mock_systemctl.return_value = False self.mock_runner.run.return_value = ("is running", "", 0) self.assertTrue(lib.is_service_running(self.mock_runner, self.service)) self.mock_runner.run.assert_called_once_with( [_service, self.service, "status"] )
def test_systemctl_not_running(self, mock_systemctl): mock_systemctl.return_value = True self.mock_runner.run.return_value = ("", 2) self.assertFalse(lib.is_service_running(self.mock_runner, self.service)) self.mock_runner.run.assert_called_once_with( ["systemctl", "is-active", self.service + ".service"] )
def config_destroy(env, ignore_config_load_problems=False): env.booth.command_expect_live_env() if not env.is_cib_live: raise LibraryError(reports.live_environment_required(["CIB"])) name = env.booth.name config_is_used = partial(booth_reports.booth_config_is_used, name) report_list = [] if resource.find_for_config( get_resources(env.get_cib()), get_config_file_name(name), ): report_list.append(config_is_used("in cluster resource")) #Only systemd is currently supported. Initd does not supports multiple #instances (here specified by name) if external.is_systemctl(): if external.is_service_running(env.cmd_runner(), "booth", name): report_list.append(config_is_used("(running in systemd)")) if external.is_service_enabled(env.cmd_runner(), "booth", name): report_list.append(config_is_used("(enabled in systemd)")) if report_list: raise LibraryError(*report_list) authfile_path = None try: authfile_path = config_structure.get_authfile( parse(env.booth.get_config_content()) ) except LibraryError: if not ignore_config_load_problems: raise LibraryError(booth_reports.booth_cannot_identify_keyfile()) #if content not received, not valid,... still remove config needed env.report_processor.process( booth_reports.booth_cannot_identify_keyfile( severity=ReportItemSeverity.WARNING ) ) if( authfile_path and os.path.dirname(authfile_path) == settings.booth_config_dir ): env.booth.set_key_path(authfile_path) env.booth.remove_key() env.booth.remove_config()
def remove_nodes_from_cib(env, node_list): """ Remove specified nodes from CIB. When pcmk is running 'crm_node -R <node>' will be used. Otherwise nodes will be removed directly from CIB file. env LibraryEnvironment node_list iterable -- names of nodes to remove """ # TODO: more advanced error handling # TODO: Tests if not env.is_cib_live: raise LibraryError(reports.live_environment_required(["CIB"])) if is_service_running(env.cmd_runner(), "pacemaker"): for node in node_list: # this may raise a LibraryError # NOTE: crm_node cannot remove multiple nodes at once remove_node(env.cmd_runner(), node) return # TODO: We need to remove nodes from the CIB file. We don't want to do it # using environment as this is a special case in which we have to edit CIB # file directly. for node in node_list: stdout, stderr, retval = env.cmd_runner().run( [ settings.cibadmin, "--delete-all", "--force", f"--xpath=/cib/configuration/nodes/node[@uname='{node}']", ], env_extend={"CIB_file": os.path.join(settings.cib_dir, "cib.xml")} ) if retval != 0: raise LibraryError( reports.node_remove_in_pacemaker_failed( [node], reason=join_multilines([stderr, stdout]) ) )
def _get_local_services_status(runner: CommandRunner) -> List[_ServiceStatus]: service_def = [ # (service name, display even if not enabled nor running) ("corosync", True), ("pacemaker", True), ("pacemaker_remote", False), ("pcsd", True), (get_sbd_service_name(), False), ] service_status_list = [] for service, display_always in service_def: try: service_status_list.append( _ServiceStatus( service, display_always, is_service_enabled(runner, service), is_service_running(runner, service), )) except LibraryError: pass return service_status_list
def config_destroy( env: LibraryEnvironment, instance_name=None, ignore_config_load_problems=False, ): # pylint: disable=too-many-branches """ remove booth configuration files env string instance_name -- booth instance name bool ignore_config_load_problems -- delete as much as possible when unable to read booth configs for the given booth instance """ report_processor = env.report_processor booth_env = env.get_booth_env(instance_name) instance_name = booth_env.instance_name _ensure_live_env(env, booth_env) # TODO use constants in reports if resource.find_for_config( get_resources(env.get_cib()), booth_env.config_path, ): report_processor.report( ReportItem.error( reports.messages.BoothConfigIsUsed( instance_name, "in cluster resource", ) ) ) # Only systemd is currently supported. Initd does not supports multiple # instances (here specified by name) if external.is_systemctl(): if external.is_service_running( env.cmd_runner(), "booth", instance_name ): report_processor.report( ReportItem.error( reports.messages.BoothConfigIsUsed( instance_name, "(running in systemd)", ) ) ) if external.is_service_enabled( env.cmd_runner(), "booth", instance_name ): report_processor.report( ReportItem.error( reports.messages.BoothConfigIsUsed( instance_name, "(enabled in systemd)", ) ) ) if report_processor.has_errors: raise LibraryError() try: authfile_path = None booth_conf = booth_env.config.read_to_facade() authfile_path = booth_conf.get_authfile() except RawFileError as e: report_processor.report( raw_file_error_report( e, force_code=report_codes.FORCE_BOOTH_DESTROY, is_forced_or_warning=ignore_config_load_problems, ) ) except ParserErrorException as e: report_processor.report_list( booth_env.config.parser_exception_to_report_list( e, force_code=report_codes.FORCE_BOOTH_DESTROY, is_forced_or_warning=ignore_config_load_problems, ) ) if report_processor.has_errors: raise LibraryError() if authfile_path: authfile_dir, authfile_name = os.path.split(authfile_path) if (authfile_dir == settings.booth_config_dir) and authfile_name: try: key_file = FileInstance.for_booth_key(authfile_name) key_file.raw_file.remove(fail_if_file_not_found=False) except RawFileError as e: report_processor.report( raw_file_error_report( e, force_code=report_codes.FORCE_BOOTH_DESTROY, is_forced_or_warning=ignore_config_load_problems, ) ) else: report_processor.report( ReportItem.warning( reports.messages.BoothUnsupportedFileLocation( authfile_path, settings.booth_config_dir, file_type_codes.BOOTH_KEY, ) ) ) if report_processor.has_errors: raise LibraryError() try: booth_env.config.raw_file.remove() except RawFileError as e: report_processor.report(raw_file_error_report(e)) if report_processor.has_errors: raise LibraryError()
def full_cluster_status_plaintext( env: LibraryEnvironment, hide_inactive_resources: bool = False, verbose: bool = False, ) -> str: """ Return full cluster status as plaintext env -- LibraryEnvironment hide_inactive_resources -- if True, do not display non-running resources verbose -- if True, display more info """ # pylint: disable=too-many-branches # pylint: disable=too-many-locals # pylint: disable=too-many-statements # validation if not env.is_cib_live and env.is_corosync_conf_live: raise LibraryError( ReportItem.error( reports.messages.LiveEnvironmentNotConsistent( [file_type_codes.CIB], [file_type_codes.COROSYNC_CONF], ) ) ) if env.is_cib_live and not env.is_corosync_conf_live: raise LibraryError( ReportItem.error( reports.messages.LiveEnvironmentNotConsistent( [file_type_codes.COROSYNC_CONF], [file_type_codes.CIB], ) ) ) # initialization runner = env.cmd_runner() report_processor = env.report_processor live = env.is_cib_live and env.is_corosync_conf_live is_sbd_running = False # load status, cib, corosync.conf status_text, warning_list = get_cluster_status_text( runner, hide_inactive_resources, verbose ) corosync_conf = None # If we are live on a remote node, we have no corosync.conf. # TODO Use the new file framework so the path is not exposed. if not live or os.path.exists(settings.corosync_conf_file): corosync_conf = env.get_corosync_conf() cib = env.get_cib() if verbose: ( ticket_status_text, ticket_status_stderr, ticket_status_retval, ) = get_ticket_status_text(runner) # get extra info if live if live: try: is_sbd_running = is_service_running(runner, get_sbd_service_name()) except LibraryError: pass local_services_status = _get_local_services_status(runner) if verbose and corosync_conf: node_name_list, node_names_report_list = get_existing_nodes_names( corosync_conf ) report_processor.report_list(node_names_report_list) node_reachability = _get_node_reachability( env.get_node_target_factory(), env.get_node_communicator(), report_processor, node_name_list, ) # check stonith configuration warning_list = list(warning_list) warning_list.extend(_stonith_warnings(cib, is_sbd_running)) # put it all together if report_processor.has_errors: raise LibraryError() cluster_name = ( corosync_conf.get_cluster_name() if corosync_conf else nvpair.get_value( "cluster_property_set", get_crm_config(cib), "cluster-name", "" ) ) parts = [] parts.append(f"Cluster name: {cluster_name}") if warning_list: parts.extend(["", "WARNINGS:"] + warning_list + [""]) parts.append(status_text) if verbose: parts.extend(["", "Tickets:"]) if ticket_status_retval != 0: ticket_warning_parts = [ "WARNING: Unable to get information about tickets" ] if ticket_status_stderr: ticket_warning_parts.extend( indent(ticket_status_stderr.splitlines()) ) parts.extend(indent(ticket_warning_parts)) else: parts.extend(indent(ticket_status_text.splitlines())) if live: if verbose and corosync_conf: parts.extend(["", "PCSD Status:"]) parts.extend( indent( _format_node_reachability(node_name_list, node_reachability) ) ) parts.extend(["", "Daemon Status:"]) parts.extend( indent(_format_local_services_status(local_services_status)) ) return "\n".join(parts)
def config_restore_local(infile_name, infile_obj): """ Commandline options: no options """ if (is_service_running(utils.cmd_runner(), "corosync") or is_service_running(utils.cmd_runner(), "pacemaker") or is_service_running(utils.cmd_runner(), "pacemaker_remote")): utils.err( "Cluster is currently running on this node. You need to stop " "the cluster in order to restore the configuration.") file_list = config_backup_path_list(with_uid_gid=True) tarball_file_list = [] version = None tmp_dir = None try: tarball = tarfile.open(infile_name, "r|*", infile_obj) while True: # next(tarball) does not work in python2.6 tar_member_info = tarball.next() if tar_member_info is None: break if tar_member_info.name == "version.txt": version_data = tarball.extractfile(tar_member_info) version = version_data.read() version_data.close() continue tarball_file_list.append(tar_member_info.name) tarball.close() required_file_list = [ tar_path for tar_path, path_info in file_list.items() if path_info["required"] ] missing = set(required_file_list) - set(tarball_file_list) if missing: utils.err( "unable to restore the cluster, missing files in backup: %s" % ", ".join(missing)) config_backup_check_version(version) if infile_obj: infile_obj.seek(0) tarball = tarfile.open(infile_name, "r|*", infile_obj) while True: # next(tarball) does not work in python2.6 tar_member_info = tarball.next() if tar_member_info is None: break extract_info = None path = tar_member_info.name while path: if path in file_list: extract_info = file_list[path] break path = os.path.dirname(path) if not extract_info: continue path_full = None if hasattr(extract_info.get("pre_store_call"), "__call__"): extract_info["pre_store_call"]() if "rename" in extract_info and extract_info["rename"]: if tmp_dir is None: tmp_dir = tempfile.mkdtemp() tarball.extractall(tmp_dir, [tar_member_info]) path_full = extract_info["path"] shutil.move(os.path.join(tmp_dir, tar_member_info.name), path_full) else: dir_path = os.path.dirname(extract_info["path"]) tarball.extractall(dir_path, [tar_member_info]) path_full = os.path.join(dir_path, tar_member_info.name) file_attrs = extract_info["attrs"] os.chmod(path_full, file_attrs["mode"]) os.chown(path_full, file_attrs["uid"], file_attrs["gid"]) tarball.close() except (tarfile.TarError, EnvironmentError, OSError) as e: utils.err("unable to restore the cluster: %s" % e) finally: if tmp_dir: shutil.rmtree(tmp_dir, ignore_errors=True) try: sig_path = os.path.join(settings.cib_dir, "cib.xml.sig") if os.path.exists(sig_path): os.remove(sig_path) except EnvironmentError as e: utils.err("unable to remove %s: %s" % (sig_path, e))
def full_cluster_status_plaintext( env: LibraryEnvironment, hide_inactive_resources: bool = False, verbose: bool = False, ) -> str: """ Return full cluster status as plaintext env -- LibraryEnvironment hide_inactive_resources -- if True, do not display non-running resources verbose -- if True, display more info """ # pylint: disable=too-many-branches # pylint: disable=too-many-locals # validation if not env.is_cib_live and env.is_corosync_conf_live: raise LibraryError( reports.live_environment_not_consistent( [file_type_codes.CIB], [file_type_codes.COROSYNC_CONF], )) if env.is_cib_live and not env.is_corosync_conf_live: raise LibraryError( reports.live_environment_not_consistent( [file_type_codes.COROSYNC_CONF], [file_type_codes.CIB], )) # initialization runner = env.cmd_runner() report_processor = SimpleReportProcessor(env.report_processor) live = env.is_cib_live and env.is_corosync_conf_live is_sbd_running = False # load status, cib, corosync.conf status_text, warning_list = get_cluster_status_text( runner, hide_inactive_resources, verbose) corosync_conf = env.get_corosync_conf() cib = env.get_cib() if verbose: ticket_status_text, ticket_status_stderr, ticket_status_retval = ( get_ticket_status_text(runner)) # get extra info if live if live: try: is_sbd_running = is_service_running(runner, get_sbd_service_name()) except LibraryError: pass local_services_status = _get_local_services_status(runner) if verbose: node_name_list, node_names_report_list = get_existing_nodes_names( corosync_conf) report_processor.report_list(node_names_report_list) node_reachability = _get_node_reachability( env.get_node_target_factory(), env.get_node_communicator(), report_processor, node_name_list, ) # check stonith configuration warning_list = list(warning_list) warning_list.extend(_stonith_warnings(cib, is_sbd_running)) # put it all together if report_processor.has_errors: raise LibraryError() parts = [] parts.append(f"Cluster name: {corosync_conf.get_cluster_name()}") if warning_list: parts.extend(["", "WARNINGS:"] + warning_list + [""]) parts.append(status_text) if verbose: parts.extend(["", "Tickets:"]) if ticket_status_retval != 0: ticket_warning_parts = [ "WARNING: Unable to get information about tickets" ] if ticket_status_stderr: ticket_warning_parts.extend( indent(ticket_status_stderr.splitlines())) parts.extend(indent(ticket_warning_parts)) else: parts.extend(indent(ticket_status_text.splitlines())) if live: if verbose: parts.extend(["", "PCSD Status:"]) parts.extend( indent( _format_node_reachability(node_name_list, node_reachability))) parts.extend(["", "Daemon Status:"]) parts.extend( indent(_format_local_services_status(local_services_status))) return "\n".join(parts)