def test_cmd_runner_no_options(self, mock_runner): expected_runner = mock.MagicMock() mock_runner.return_value = expected_runner env = LibraryEnvironment(self.mock_logger, self.mock_reporter) runner = env.cmd_runner() self.assertEqual(expected_runner, runner) mock_runner.assert_called_once_with( self.mock_logger, self.mock_reporter, {} )
class PushCib(TestCase): def setUp(self): self.env = LibraryEnvironment( mock.MagicMock(logging.Logger), MockLibraryReportProcessor() ) def test_run_only_push_when_without_wait(self, wait_for_idle, push_cib_xml): self.env.push_cib(etree.fromstring("<cib/>")) push_cib_xml.assert_called_once_with("<cib/>") wait_for_idle.assert_not_called() def test_run_wait_when_wait_specified(self, wait_for_idle, push_cib_xml): self.env.push_cib(etree.fromstring("<cib/>"), 10) push_cib_xml.assert_called_once_with("<cib/>") wait_for_idle.assert_called_once_with(self.env.cmd_runner(), 10)
def test_cmd_runner_all_options(self, mock_runner): expected_runner = mock.MagicMock() mock_runner.return_value = expected_runner user = "******" env = LibraryEnvironment( self.mock_logger, self.mock_reporter, user_login=user ) runner = env.cmd_runner() self.assertEqual(expected_runner, runner) mock_runner.assert_called_once_with( self.mock_logger, self.mock_reporter, {"CIB_user": user} )
def unfence_node(env: LibraryEnvironment, node: str, devices: Iterable[str]): """ Unfence scsi devices on a node by calling fence_scsi agent script. env -- provides communication with externals node -- node name on wich is unfencing performed devices -- scsi devices to be unfenced """ stdout, stderr, return_code = env.cmd_runner().run([ os.path.join(settings.fence_agent_binaries, "fence_scsi"), "--action=on", "--devices", ",".join(sorted(devices)), f"--plug={node}", ], ) if return_code != 0: raise LibraryError( reports.ReportItem.error( reports.messages.StonithUnfencingFailed( join_multilines([stderr, stdout]))))
def qdevice_status_text( lib_env: LibraryEnvironment, model, verbose=False, cluster=None, ): """ Get runtime status of a quorum device in plain text string model qdevice model to query bool verbose get more detailed output string cluster show information only about specified cluster """ _check_model(model) runner = lib_env.cmd_runner() try: return ( qdevice_net.qdevice_status_generic_text(runner, verbose) + qdevice_net.qdevice_status_cluster_text(runner, cluster, verbose)) except qdevice_net.QnetdNotRunningException: raise LibraryError(reports.qdevice_not_running(model))
def qdevice_destroy(lib_env: LibraryEnvironment, model, proceed_if_used=False): """ Stop and disable qdevice on local host and remove its configuration string model qdevice model to destroy bool procced_if_used destroy qdevice even if it is used by clusters """ _check_model(model) report_processor = lib_env.report_processor service_manager = lib_env.service_manager _check_qdevice_not_used( report_processor, lib_env.cmd_runner(), model, proceed_if_used ) _service_stop(report_processor, service_manager, qdevice_net.SERVICE_NAME) _service_disable( report_processor, service_manager, qdevice_net.SERVICE_NAME ) qdevice_net.qdevice_destroy() report_processor.report( ReportItem.info(reports.messages.QdeviceDestroySuccess(model)) )
def history_cleanup(env: LibraryEnvironment, node: Optional[str] = None): """ Clear fencing history env node -- clear history for the specified node or all nodes if None """ runner = env.cmd_runner() if not is_fence_history_supported_management(runner): raise LibraryError( ReportItem.error(reports.messages.FenceHistoryNotSupported())) try: return fence_history_cleanup(runner, node) except FenceHistoryCommandErrorException as e: raise LibraryError( ReportItem.error( reports.messages.FenceHistoryCommandError( str(e), reports.const.FENCE_HISTORY_COMMAND_CLEANUP))) from e
def _service_disable(lib_env: LibraryEnvironment, func): try: func(lib_env.cmd_runner()) except external.DisableServiceError as e: raise LibraryError( ReportItem.error( reports.messages.ServiceActionFailed( reports.const.SERVICE_ACTION_DISABLE, e.service, e.message, ) ) ) from e lib_env.report_processor.report( ReportItem.info( reports.messages.ServiceActionSucceeded( reports.const.SERVICE_ACTION_DISABLE, "quorum device" ) ) )
def client_net_import_certificate(lib_env: LibraryEnvironment, certificate): """ Import qnetd client certificate to local node certificate storage certificate base64 encoded qnetd client certificate """ try: certificate_data = base64.b64decode(certificate) except (TypeError, binascii.Error) as e: raise LibraryError( ReportItem.error( reports.messages.InvalidOptionValue( "qnetd client certificate", certificate, ["base64 encoded certificate"], ) ) ) from e qdevice_net.client_import_certificate_and_key( lib_env.cmd_runner(), certificate_data )
def update_scsi_devices( env: LibraryEnvironment, stonith_id: str, set_device_list: Iterable[str], force_flags: Container[reports.types.ForceCode] = (), ) -> None: """ Update scsi fencing devices without restart and affecting other resources. env -- provides all for communication with externals stonith_id -- id of stonith resource set_device_list -- paths to the scsi devices that would be set for stonith resource force_flags -- list of flags codes """ if not set_device_list: env.report_processor.report( ReportItem.error( reports.messages.InvalidOptionValue("devices", "", None, cannot_be_empty=True))) runner = env.cmd_runner() ( stonith_el, current_device_list, ) = _update_scsi_devices_get_element_and_devices(runner, env.report_processor, env.get_cib(), stonith_id) if env.report_processor.has_errors: raise LibraryError() resource.stonith.update_scsi_devices_without_restart( runner, env.get_cluster_state(), stonith_el, IdProvider(stonith_el), set_device_list, ) _unfencing_scsi_devices(env, stonith_el, current_device_list, set_device_list, force_flags) env.push_cib()
def _ticket_operation(operation, env: LibraryEnvironment, ticket_name, site_ip, instance_name): booth_env = env.get_booth_env(instance_name) _ensure_live_env(env, booth_env) if not site_ip: site_ip_list = resource.find_bound_ip(get_resources(env.get_cib()), booth_env.config_path) if len(site_ip_list) != 1: raise LibraryError( booth_reports.booth_cannot_determine_local_site_ip()) site_ip = site_ip_list[0] stdout, stderr, return_code = env.cmd_runner().run( [settings.booth_binary, operation, "-s", site_ip, ticket_name]) if return_code != 0: raise LibraryError( booth_reports.booth_ticket_operation_failed( operation, join_multilines([stderr, stdout]), site_ip, ticket_name))
def describe_agent(lib_env: LibraryEnvironment, agent_name: str) -> Dict[str, Any]: """ Get agent's description (metadata) in a structure agent_name -- name of the agent """ runner = lib_env.cmd_runner() report_processor = lib_env.report_processor agent_factory = ResourceAgentFacadeFactory(runner, report_processor) try: found_name = (split_resource_agent_name(agent_name) if ":" in agent_name else find_one_resource_agent_by_type( runner, report_processor, agent_name)) return _agent_metadata_to_dict( agent_factory.facade_from_parsed_name(found_name).metadata, describe=True, ) except ResourceAgentError as e: lib_env.report_processor.report(resource_agent_error_to_report_item(e)) raise LibraryError() from e
def stop_booth(env: LibraryEnvironment, instance_name=None): """ Stop specified instance of booth service, systemd systems supported only. env string instance_name -- booth instance name """ external.ensure_is_systemd() booth_env = env.get_booth_env(instance_name) _ensure_live_env(env, booth_env) instance_name = booth_env.instance_name try: external.stop_service(env.cmd_runner(), "booth", instance_name) except external.StopServiceError as e: raise LibraryError( reports.service_stop_error("booth", e.message, instance=instance_name)) env.report_processor.report( reports.service_stop_success("booth", instance=instance_name))
def qdevice_net_sign_certificate_request( lib_env: LibraryEnvironment, certificate_request, cluster_name, ): """ Sign node certificate request by qnetd CA string certificate_request base64 encoded certificate request string cluster_name name of the cluster to which qdevice is being added """ try: certificate_request_data = base64.b64decode(certificate_request) except (TypeError, binascii.Error): raise LibraryError( reports.invalid_option_value("qnetd certificate request", certificate_request, ["base64 encoded certificate"])) return base64.b64encode( qdevice_net.qdevice_sign_certificate_request(lib_env.cmd_runner(), certificate_request_data, cluster_name))
def _service_kill(lib_env: LibraryEnvironment, service: str) -> None: try: external.kill_services(lib_env.cmd_runner(), [service]) except external.KillServicesError as e: raise LibraryError( *[ ReportItem.error( reports.messages.ServiceActionFailed( reports.const.SERVICE_ACTION_KILL, service, e.message ) ) for service in e.service ] ) from e lib_env.report_processor.report( ReportItem.info( reports.messages.ServiceActionSucceeded( reports.const.SERVICE_ACTION_KILL, "quorum device" ) ) )
def test_dump_cib_file(self, mock_tmpfile, mock_runner): tmp_file_name = "a file" expected_runner = mock.MagicMock() mock_runner.return_value = expected_runner mock_instance = mock.MagicMock() mock_instance.name = tmp_file_name mock_tmpfile.return_value = mock_instance env = LibraryEnvironment(self.mock_logger, self.mock_reporter, cib_data="<cib />") runner = env.cmd_runner() self.assertEqual(expected_runner, runner) mock_runner.assert_called_once_with( self.mock_logger, self.mock_reporter, { "LC_ALL": "C", "CIB_file": tmp_file_name, }, ) mock_tmpfile.assert_called_once_with(self.mock_reporter, "<cib />")
def list_agents( lib_env: LibraryEnvironment, describe: bool = True, search: Optional[str] = None, ) -> List[Dict[str, Any]]: """ List all stonith agents on the local host, optionally filtered and described describe -- load and return agents' description as well search -- return only agents which name contains this string """ runner = lib_env.cmd_runner() return _complete_agent_list( runner, lib_env.report_processor, sorted( _get_agent_names(runner, StandardProviderTuple("stonith")), key=lambda item: item.full_name, ), describe, search, )
def describe_agent(lib_env: LibraryEnvironment, agent_name: str) -> Dict[str, Any]: """ Get agent's description (metadata) in a structure agent_name -- name of the agent (not containing "stonith:" prefix) """ runner = lib_env.cmd_runner() agent_factory = ResourceAgentFacadeFactory(runner, lib_env.report_processor) try: if ":" in agent_name: raise InvalidResourceAgentName(agent_name) return _agent_metadata_to_dict( agent_factory.facade_from_parsed_name( ResourceAgentName("stonith", None, agent_name)).metadata, describe=True, ) except ResourceAgentError as e: lib_env.report_processor.report( resource_agent_error_to_report_item(e, is_stonith=True)) raise LibraryError() from e
def _defaults_config(env: LibraryEnvironment, cib_section_name: str, evaluate_expired: bool) -> List[CibNvsetDto]: runner = env.cmd_runner() cib = env.get_cib() if evaluate_expired: if has_rule_in_effect_status_tool(): in_effect_eval: RuleInEffectEval = RuleInEffectEvalOneByOne( cib, runner) else: in_effect_eval = RuleInEffectEvalDummy() env.report_processor.report( ReportItem.warning(reports.messages. RuleInEffectStatusDetectionNotSupported())) else: in_effect_eval = RuleInEffectEvalDummy() return [ nvpair_multi.nvset_element_to_dto(nvset_el, in_effect_eval) for nvset_el in nvpair_multi.find_nvsets( sections.get(cib, cib_section_name)) ]
def qdevice_setup(lib_env: LibraryEnvironment, model, enable, start): """ Initialize qdevice on local host with specified model string model qdevice model to initialize bool enable make qdevice service start on boot bool start start qdevice now """ _check_model(model) qdevice_net.qdevice_setup(lib_env.cmd_runner()) report_processor = lib_env.report_processor service_manager = lib_env.service_manager report_processor.report( ReportItem.info(reports.messages.QdeviceInitializationSuccess(model)) ) if enable: _service_enable( report_processor, service_manager, qdevice_net.SERVICE_NAME ) if start: _service_start( report_processor, service_manager, qdevice_net.SERVICE_NAME )
def history_update(env: LibraryEnvironment): """ Update fencing history in a cluster (sync with other nodes) env """ runner = env.cmd_runner() if not is_fence_history_supported_management(runner): raise LibraryError( ReportItem.error(reports.messages.FenceHistoryNotSupported()) ) try: return fence_history_update(runner) except FenceHistoryCommandErrorException as e: raise LibraryError( ReportItem.error( reports.messages.FenceHistoryCommandError( str(e), reports.const.FENCE_HISTORY_COMMAND_UPDATE ) ) ) from e
def operation_defaults_config(env: LibraryEnvironment, evaluate_expired: bool) -> CibDefaultsDto: """ List all operation defaults nvsets env -- evaluate_expired -- also evaluate whether rules are expired or in effect """ cib = env.get_cib() rule_evaluator = _get_rule_evaluator(cib, env.cmd_runner(), env.report_processor, evaluate_expired) get_config = lambda tag: _defaults_config( cib, tag, sections.OP_DEFAULTS, rule_evaluator, ) return CibDefaultsDto( instance_attributes=get_config(nvpair_multi.NVSET_INSTANCE), meta_attributes=get_config(nvpair_multi.NVSET_META), )
def test_dump_cib_file(self, mock_tmpfile, mock_runner): expected_runner = mock.MagicMock() mock_runner.return_value = expected_runner mock_instance = mock.MagicMock() mock_instance.name = rc("file.tmp") mock_tmpfile.return_value = mock_instance env = LibraryEnvironment( self.mock_logger, self.mock_reporter, cib_data="<cib />" ) runner = env.cmd_runner() self.assertEqual(expected_runner, runner) mock_runner.assert_called_once_with( self.mock_logger, self.mock_reporter, { "LC_ALL": "C", "CIB_file": rc("file.tmp"), } ) mock_tmpfile.assert_called_once_with("<cib />")
def history_get_text(env: LibraryEnvironment, node: Optional[str] = None): """ Get full fencing history in plain text env node -- get history for the specified node or all nodes if None """ runner = env.cmd_runner() if not is_fence_history_supported_management(runner): raise LibraryError( ReportItem.error(reports.messages.FenceHistoryNotSupported()) ) try: return fence_history_text(runner, node) except FenceHistoryCommandErrorException as e: raise LibraryError( ReportItem.error( reports.messages.FenceHistoryCommandError( str(e), reports.const.FENCE_HISTORY_COMMAND_SHOW ) ) ) from e
def start_booth(env: LibraryEnvironment, instance_name=None): """ Start specified instance of booth service, systemd systems supported only. On non-systemd systems it can be run like this: BOOTH_CONF_FILE=<booth-file-path> /etc/initd/booth-arbitrator env string instance_name -- booth instance name """ external.ensure_is_systemd() booth_env = env.get_booth_env(instance_name) _ensure_live_env(env, booth_env) instance_name = booth_env.instance_name try: external.start_service(env.cmd_runner(), "booth", instance_name) except external.StartServiceError as e: raise LibraryError( reports.service_start_error("booth", e.message, instance=instance_name)) env.report_processor.report( reports.service_start_success("booth", instance=instance_name))
def start_booth(env: LibraryEnvironment, instance_name=None): """ Start specified instance of booth service, systemd systems supported only. On non-systemd systems it can be run like this: BOOTH_CONF_FILE=<booth-file-path> /etc/initd/booth-arbitrator env string instance_name -- booth instance name """ external.ensure_is_systemd() booth_env = env.get_booth_env(instance_name) _ensure_live_env(env, booth_env) instance_name = booth_env.instance_name try: external.start_service(env.cmd_runner(), "booth", instance_name) except external.StartServiceError as e: raise LibraryError( ReportItem.error( reports.messages.ServiceActionFailed( reports.const.SERVICE_ACTION_START, "booth", e.message, instance=instance_name, ) ) ) from e env.report_processor.report( ReportItem.info( reports.messages.ServiceActionSucceeded( reports.const.SERVICE_ACTION_START, "booth", instance=instance_name, ) ) )
def create( env: LibraryEnvironment, stonith_id: str, stonith_agent_name: str, operations: Iterable[Mapping[str, str]], meta_attributes: Mapping[str, str], instance_attributes: Mapping[str, str], allow_absent_agent: bool = False, allow_invalid_operation: bool = False, allow_invalid_instance_attributes: bool = False, use_default_operations: bool = True, ensure_disabled: bool = False, wait: WaitType = False, ): # pylint: disable=too-many-arguments, too-many-locals """ Create stonith as resource in a cib. env -- provides all for communication with externals stonith_id -- an identifier of stonith resource stonith_agent_name -- contains name for the identification of agent operations -- contains attributes for each entered operation meta_attributes -- contains attributes for primitive/meta_attributes instance_attributes -- contains attributes for primitive/instance_attributes allow_absent_agent -- a flag for allowing agent not installed in a system allow_invalid_operation -- a flag for allowing to use operations that are not listed in a stonith agent metadata allow_invalid_instance_attributes -- a flag for allowing to use instance attributes that are not listed in a stonith agent metadata or for allowing to not use the instance_attributes that are required in stonith agent metadata use_default_operations -- a flag for stopping of adding default cib operations (specified in a stonith agent) ensure_disabled -- flag that keeps resource in target-role "Stopped" wait -- flag for controlling waiting for pacemaker idle mechanism """ runner = env.cmd_runner() agent_factory = ResourceAgentFacadeFactory(runner, env.report_processor) stonith_agent = _get_agent_facade( env.report_processor, agent_factory, stonith_agent_name, allow_absent_agent, ) if stonith_agent.metadata.provides_unfencing: meta_attributes = dict(meta_attributes, provides="unfencing") with resource_environment( env, wait, [stonith_id], _ensure_disabled_after_wait( ensure_disabled or resource.common.are_meta_disabled(meta_attributes), ), ) as resources_section: id_provider = IdProvider(resources_section) stonith_element = resource.primitive.create( env.report_processor, resources_section, id_provider, stonith_id, stonith_agent, raw_operation_list=operations, meta_attributes=meta_attributes, instance_attributes=instance_attributes, allow_invalid_operation=allow_invalid_operation, allow_invalid_instance_attributes=allow_invalid_instance_attributes, use_default_operations=use_default_operations, resource_type="stonith", ) if ensure_disabled: resource.common.disable(stonith_element, id_provider)
class TestDescribeAgent(TestCase): def setUp(self): self.mock_logger = mock.MagicMock(logging.Logger) self.mock_reporter = MockLibraryReportProcessor() self.lib_env = LibraryEnvironment(self.mock_logger, self.mock_reporter) self.metadata = """ <resource-agent> <shortdesc>short desc</shortdesc> <longdesc>long desc</longdesc> <parameters> </parameters> <actions> </actions> </resource-agent> """ self.description = { "name": "ocf:test:Dummy", "shortdesc": "short desc", "longdesc": "long desc", "parameters": [], "actions": [], } def test_full_name_success(self, mock_guess, mock_metadata): mock_metadata.return_value = self.metadata self.assertEqual( lib.describe_agent(self.lib_env, "ocf:test:Dummy"), self.description ) self.assertEqual(len(mock_metadata.mock_calls), 1) mock_guess.assert_not_called() def test_guess_success(self, mock_guess, mock_metadata): mock_metadata.return_value = self.metadata mock_guess.return_value = lib_ra.ResourceAgent( self.lib_env.cmd_runner(), "ocf:test:Dummy" ) self.assertEqual( lib.describe_agent(self.lib_env, "dummy"), self.description ) self.assertEqual(len(mock_metadata.mock_calls), 1) mock_guess.assert_called_once_with("mock_runner", "dummy") def test_full_name_fail(self, mock_guess, mock_metadata): mock_metadata.return_value = "invalid xml" assert_raise_library_error( lambda: lib.describe_agent(self.lib_env, "ocf:test:Dummy"), ( severity.ERROR, report_codes.UNABLE_TO_GET_AGENT_METADATA, { "agent": "ocf:test:Dummy", "reason": "Start tag expected, '<' not found, line 1, column 1", } ) ) self.assertEqual(len(mock_metadata.mock_calls), 1) mock_guess.assert_not_called()
def create_in_group( env: LibraryEnvironment, stonith_id: str, stonith_agent_name: str, group_id: str, operations: Iterable[Mapping[str, str]], meta_attributes: Mapping[str, str], instance_attributes: Mapping[str, str], allow_absent_agent: bool = False, allow_invalid_operation: bool = False, allow_invalid_instance_attributes: bool = False, use_default_operations: bool = True, ensure_disabled: bool = False, adjacent_resource_id: Optional[str] = None, put_after_adjacent: bool = False, wait: WaitType = False, ): # pylint: disable=too-many-arguments, too-many-locals """ DEPRECATED Create stonith as resource in a cib and put it into defined group. env -- provides all for communication with externals stonith_id --an identifier of stonith resource stonith_agent_name -- contains name for the identification of agent group_id -- identificator for group to put stonith inside operations -- contains attributes for each entered operation meta_attributes -- contains attributes for primitive/meta_attributes instance_attributes -- contains attributes for primitive/instance_attributes allow_absent_agent -- a flag for allowing agent not installed in a system allow_invalid_operation -- a flag for allowing to use operations that are not listed in a stonith agent metadata allow_invalid_instance_attributes -- a flag for allowing to use instance attributes that are not listed in a stonith agent metadata or for allowing to not use the instance_attributes that are required in stonith agent metadata use_default_operations -- a flag for stopping of adding default cib operations (specified in a stonith agent) ensure_disabled -- flag that keeps resource in target-role "Stopped" adjacent_resource_id -- identify neighbor of a newly created stonith put_after_adjacent -- is flag to put a newly create resource befor/after adjacent stonith wait -- flag for controlling waiting for pacemaker idle mechanism """ runner = env.cmd_runner() agent_factory = ResourceAgentFacadeFactory(runner, env.report_processor) stonith_agent = _get_agent_facade( env.report_processor, agent_factory, stonith_agent_name, allow_absent_agent, ) if stonith_agent.metadata.provides_unfencing: meta_attributes = dict(meta_attributes, provides="unfencing") with resource_environment( env, wait, [stonith_id], _ensure_disabled_after_wait( ensure_disabled or resource.common.are_meta_disabled(meta_attributes), ), ) as resources_section: id_provider = IdProvider(resources_section) adjacent_resource_element = None if adjacent_resource_id: try: adjacent_resource_element = get_element_by_id( get_root(resources_section), adjacent_resource_id) except ElementNotFound: # We cannot continue without adjacent element because # the validator might produce misleading reports if env.report_processor.report( ReportItem.error( reports.messages.IdNotFound( adjacent_resource_id, []))).has_errors: raise LibraryError() from None try: group_element = get_element_by_id(get_root(resources_section), group_id) except ElementNotFound: group_id_reports: List[ReportItem] = [] validate_id(group_id, description="group name", reporter=group_id_reports) env.report_processor.report_list(group_id_reports) group_element = resource.group.append_new(resources_section, group_id) stonith_element = resource.primitive.create( env.report_processor, resources_section, id_provider, stonith_id, stonith_agent, operations, meta_attributes, instance_attributes, allow_invalid_operation, allow_invalid_instance_attributes, use_default_operations, ) if ensure_disabled: resource.common.disable(stonith_element, id_provider) if env.report_processor.report_list( resource.validations.validate_move_resources_to_group( group_element, [stonith_element], adjacent_resource_element, )).has_errors: raise LibraryError() resource.hierarchy.move_resources_to_group( group_element, [stonith_element], adjacent_resource_element, put_after_adjacent, )
def config_destroy( env: LibraryEnvironment, instance_name=None, ignore_config_load_problems=False, ): # pylint: disable=too-many-branches """ remove booth configuration files env string instance_name -- booth instance name bool ignore_config_load_problems -- delete as much as possible when unable to read booth configs for the given booth instance """ report_processor = env.report_processor booth_env = env.get_booth_env(instance_name) instance_name = booth_env.instance_name _ensure_live_env(env, booth_env) # TODO use constants in reports if resource.find_for_config( get_resources(env.get_cib()), booth_env.config_path, ): report_processor.report( ReportItem.error( reports.messages.BoothConfigIsUsed( instance_name, "in cluster resource", ) ) ) # Only systemd is currently supported. Initd does not supports multiple # instances (here specified by name) if external.is_systemctl(): if external.is_service_running( env.cmd_runner(), "booth", instance_name ): report_processor.report( ReportItem.error( reports.messages.BoothConfigIsUsed( instance_name, "(running in systemd)", ) ) ) if external.is_service_enabled( env.cmd_runner(), "booth", instance_name ): report_processor.report( ReportItem.error( reports.messages.BoothConfigIsUsed( instance_name, "(enabled in systemd)", ) ) ) if report_processor.has_errors: raise LibraryError() try: authfile_path = None booth_conf = booth_env.config.read_to_facade() authfile_path = booth_conf.get_authfile() except RawFileError as e: report_processor.report( raw_file_error_report( e, force_code=report_codes.FORCE_BOOTH_DESTROY, is_forced_or_warning=ignore_config_load_problems, ) ) except ParserErrorException as e: report_processor.report_list( booth_env.config.parser_exception_to_report_list( e, force_code=report_codes.FORCE_BOOTH_DESTROY, is_forced_or_warning=ignore_config_load_problems, ) ) if report_processor.has_errors: raise LibraryError() if authfile_path: authfile_dir, authfile_name = os.path.split(authfile_path) if (authfile_dir == settings.booth_config_dir) and authfile_name: try: key_file = FileInstance.for_booth_key(authfile_name) key_file.raw_file.remove(fail_if_file_not_found=False) except RawFileError as e: report_processor.report( raw_file_error_report( e, force_code=report_codes.FORCE_BOOTH_DESTROY, is_forced_or_warning=ignore_config_load_problems, ) ) else: report_processor.report( ReportItem.warning( reports.messages.BoothUnsupportedFileLocation( authfile_path, settings.booth_config_dir, file_type_codes.BOOTH_KEY, ) ) ) if report_processor.has_errors: raise LibraryError() try: booth_env.config.raw_file.remove() except RawFileError as e: report_processor.report(raw_file_error_report(e)) if report_processor.has_errors: raise LibraryError()
def create_in_cluster( env: LibraryEnvironment, ip, instance_name=None, allow_absent_resource_agent=False, ): """ Create group with ip resource and booth resource env -- provides all for communication with externals string ip -- float ip address for the operation of the booth string instance_name -- booth instance name bool allow_absent_resource_agent -- allowing creating booth resource even if its agent is not installed """ report_processor = env.report_processor booth_env = env.get_booth_env(instance_name) # Booth config path goes to CIB. Working with a mocked booth configs would # not work coorectly as the path would point to a mock file (the path to a # mock file is unknown to us in the lib anyway) # It makes sense to work with a mocked CIB, though. Users can do other # changes to the CIB and push them to the cluster at once. _ensure_live_booth_env(booth_env) resources_section = get_resources(env.get_cib()) id_provider = IdProvider(resources_section) instance_name = booth_env.instance_name # validate if resource.find_for_config(resources_section, booth_env.config_path): report_processor.report( ReportItem.error(reports.messages.BoothAlreadyInCib(instance_name)) ) # verify the config exists and is readable try: booth_env.config.raw_file.read() except RawFileError as e: report_processor.report(raw_file_error_report(e)) if report_processor.has_errors: raise LibraryError() # validation done create_id = partial( resource.create_resource_id, resources_section, instance_name ) get_agent = partial( find_valid_resource_agent_by_name, env.report_processor, env.cmd_runner(), allowed_absent=allow_absent_resource_agent, ) create_primitive = partial( primitive.create, env.report_processor, resources_section, id_provider ) into_booth_group = partial( group.place_resource, group.provide_group(resources_section, create_id("group")), ) into_booth_group( create_primitive( create_id("ip"), get_agent("ocf:heartbeat:IPaddr2"), instance_attributes={"ip": ip}, ) ) into_booth_group( create_primitive( create_id("service"), get_agent("ocf:pacemaker:booth-site"), instance_attributes={"config": booth_env.config_path}, ) ) env.push_cib()
def full_cluster_status_plaintext( env: LibraryEnvironment, hide_inactive_resources: bool = False, verbose: bool = False, ) -> str: """ Return full cluster status as plaintext env -- LibraryEnvironment hide_inactive_resources -- if True, do not display non-running resources verbose -- if True, display more info """ # pylint: disable=too-many-branches # pylint: disable=too-many-locals # pylint: disable=too-many-statements # validation if not env.is_cib_live and env.is_corosync_conf_live: raise LibraryError( ReportItem.error( reports.messages.LiveEnvironmentNotConsistent( [file_type_codes.CIB], [file_type_codes.COROSYNC_CONF], ) ) ) if env.is_cib_live and not env.is_corosync_conf_live: raise LibraryError( ReportItem.error( reports.messages.LiveEnvironmentNotConsistent( [file_type_codes.COROSYNC_CONF], [file_type_codes.CIB], ) ) ) # initialization runner = env.cmd_runner() report_processor = env.report_processor live = env.is_cib_live and env.is_corosync_conf_live is_sbd_running = False # load status, cib, corosync.conf status_text, warning_list = get_cluster_status_text( runner, hide_inactive_resources, verbose ) corosync_conf = None # If we are live on a remote node, we have no corosync.conf. # TODO Use the new file framework so the path is not exposed. if not live or os.path.exists(settings.corosync_conf_file): corosync_conf = env.get_corosync_conf() cib = env.get_cib() if verbose: ( ticket_status_text, ticket_status_stderr, ticket_status_retval, ) = get_ticket_status_text(runner) # get extra info if live if live: try: is_sbd_running = is_service_running(runner, get_sbd_service_name()) except LibraryError: pass local_services_status = _get_local_services_status(runner) if verbose and corosync_conf: node_name_list, node_names_report_list = get_existing_nodes_names( corosync_conf ) report_processor.report_list(node_names_report_list) node_reachability = _get_node_reachability( env.get_node_target_factory(), env.get_node_communicator(), report_processor, node_name_list, ) # check stonith configuration warning_list = list(warning_list) warning_list.extend(_stonith_warnings(cib, is_sbd_running)) # put it all together if report_processor.has_errors: raise LibraryError() cluster_name = ( corosync_conf.get_cluster_name() if corosync_conf else nvpair.get_value( "cluster_property_set", get_crm_config(cib), "cluster-name", "" ) ) parts = [] parts.append(f"Cluster name: {cluster_name}") if warning_list: parts.extend(["", "WARNINGS:"] + warning_list + [""]) parts.append(status_text) if verbose: parts.extend(["", "Tickets:"]) if ticket_status_retval != 0: ticket_warning_parts = [ "WARNING: Unable to get information about tickets" ] if ticket_status_stderr: ticket_warning_parts.extend( indent(ticket_status_stderr.splitlines()) ) parts.extend(indent(ticket_warning_parts)) else: parts.extend(indent(ticket_status_text.splitlines())) if live: if verbose and corosync_conf: parts.extend(["", "PCSD Status:"]) parts.extend( indent( _format_node_reachability(node_name_list, node_reachability) ) ) parts.extend(["", "Daemon Status:"]) parts.extend( indent(_format_local_services_status(local_services_status)) ) return "\n".join(parts)
def node_add_remote( env: LibraryEnvironment, node_name: str, node_addr: Optional[str], operations: Iterable[Mapping[str, str]], meta_attributes: Mapping[str, str], instance_attributes: Mapping[str, str], skip_offline_nodes: bool = False, allow_incomplete_distribution: bool = False, allow_pacemaker_remote_service_fail: bool = False, allow_invalid_operation: bool = False, allow_invalid_instance_attributes: bool = False, use_default_operations: bool = True, wait: WaitType = False, ): # pylint: disable=too-many-arguments # pylint: disable=too-many-branches # pylint: disable=too-many-locals # pylint: disable=too-many-statements """ create an ocf:pacemaker:remote resource and use it as a remote node env -- provides all for communication with externals node_name -- the name of the new node node_addr -- the address of the new node or None for default operations -- attributes for each entered operation meta_attributes -- attributes for primitive/meta_attributes instance_attributes -- attributes for primitive/instance_attributes skip_offline_nodes -- if True, ignore when some nodes are offline allow_incomplete_distribution -- if True, allow this command to finish successfully even if file distribution did not succeed allow_pacemaker_remote_service_fail -- if True, allow this command to finish successfully even if starting/enabling pacemaker_remote did not succeed allow_invalid_operation -- if True, allow to use operations that are not listed in a resource agent metadata allow_invalid_instance_attributes -- if True, allow to use instance attributes that are not listed in a resource agent metadata and allow to omit required instance_attributes use_default_operations -- if True, add operations specified in a resource agent metadata to the resource wait -- a flag for controlling waiting for pacemaker idle mechanism """ wait_timeout = env.ensure_wait_satisfiable(wait) report_processor = env.report_processor cib = env.get_cib( minimal_version=get_required_cib_version_for_primitive(operations)) id_provider = IdProvider(cib) if env.is_cib_live: corosync_conf: Optional[CorosyncConfigFacade] = env.get_corosync_conf() else: corosync_conf = None report_processor.report( ReportItem.info( reports.messages.CorosyncNodeConflictCheckSkipped( reports.const.REASON_NOT_LIVE_CIB, ))) ( existing_nodes_names, existing_nodes_addrs, report_list, ) = get_existing_nodes_names_addrs(corosync_conf, cib) if env.is_cib_live: # We just reported corosync checks are going to be skipped so we # shouldn't complain about errors related to corosync nodes report_processor.report_list(report_list) try: resource_agent_facade = ResourceAgentFacadeFactory( env.cmd_runner(), report_processor).facade_from_parsed_name(remote_node.AGENT_NAME) except ResourceAgentError as e: report_processor.report(resource_agent_error_to_report_item(e)) raise LibraryError() from e existing_target_list = [] if env.is_cib_live: target_factory = env.get_node_target_factory() existing_target_list, new_target_list = _get_targets_for_add( target_factory, report_processor, existing_nodes_names, [node_name], skip_offline_nodes, ) new_target = new_target_list[0] if new_target_list else None # default node_addr to an address from known-hosts if node_addr is None: if new_target: node_addr = new_target.first_addr node_addr_source = ( reports.const.DEFAULT_ADDRESS_SOURCE_KNOWN_HOSTS) else: node_addr = node_name node_addr_source = ( reports.const.DEFAULT_ADDRESS_SOURCE_HOST_NAME) report_processor.report( ReportItem.info( reports.messages.UsingDefaultAddressForHost( node_name, node_addr, node_addr_source))) else: # default node_addr to an address from known-hosts if node_addr is None: known_hosts = env.get_known_hosts([node_name]) if known_hosts: node_addr = known_hosts[0].dest.addr node_addr_source = ( reports.const.DEFAULT_ADDRESS_SOURCE_KNOWN_HOSTS) else: node_addr = node_name node_addr_source = ( reports.const.DEFAULT_ADDRESS_SOURCE_HOST_NAME) report_processor.report( ReportItem.info( reports.messages.UsingDefaultAddressForHost( node_name, node_addr, node_addr_source))) # validate inputs report_list = remote_node.validate_create( existing_nodes_names, existing_nodes_addrs, resource_agent_facade.metadata, node_name, node_addr, instance_attributes, ) if report_processor.report_list(report_list).has_errors: raise LibraryError() # validation + cib setup # TODO extract the validation to a separate function try: remote_resource_element = remote_node.create( env.report_processor, resource_agent_facade, get_resources(cib), id_provider, node_addr, node_name, operations, meta_attributes, instance_attributes, allow_invalid_operation, allow_invalid_instance_attributes, use_default_operations, ) except LibraryError as e: # Check unique id conflict with check against nodes. Until validation # resource create is not separated, we need to make unique post # validation. already_exists = [] unified_report_list = [] for report_item in report_list + list(e.args): # pylint: disable=no-member dto_obj = report_item.message.to_dto() if dto_obj.code not in ( reports.codes.ID_ALREADY_EXISTS, reports.codes.RESOURCE_INSTANCE_ATTR_VALUE_NOT_UNIQUE, ): unified_report_list.append(report_item) elif ("id" in dto_obj.payload and dto_obj.payload["id"] not in already_exists): unified_report_list.append(report_item) already_exists.append(dto_obj.payload["id"]) report_list = unified_report_list report_processor.report_list(report_list) if report_processor.has_errors: raise LibraryError() # everything validated, let's set it up if env.is_cib_live: _prepare_pacemaker_remote_environment( env, report_processor, existing_target_list, new_target, node_name, skip_offline_nodes, allow_incomplete_distribution, allow_pacemaker_remote_service_fail, ) else: report_processor.report_list( _reports_skip_new_node(node_name, "not_live_cib")) env.push_cib(wait_timeout=wait_timeout) if wait_timeout >= 0: _ensure_resource_running(env, remote_resource_element.attrib["id"])
def add_device( lib_env: LibraryEnvironment, model, model_options, generic_options, heuristics_options, force_model=False, force_options=False, skip_offline_nodes=False, ): # pylint: disable=too-many-locals """ Add a quorum device to a cluster, distribute and reload configs if live string model -- quorum device model dict model_options -- model specific options dict generic_options -- generic quorum device options dict heuristics_options -- heuristics options bool force_model -- continue even if the model is not valid bool force_options -- continue even if options are not valid bool skip_offline_nodes -- continue even if not all nodes are accessible """ cfg = lib_env.get_corosync_conf() if cfg.has_quorum_device(): raise LibraryError( ReportItem.error(reports.messages.QdeviceAlreadyDefined())) report_processor = lib_env.report_processor report_processor.report_list( corosync_conf_validators.add_quorum_device( model, model_options, generic_options, heuristics_options, [node.nodeid for node in cfg.get_nodes()], force_model=force_model, force_options=force_options, )) if lib_env.is_corosync_conf_live: cluster_nodes_names, report_list = get_existing_nodes_names( cfg, # Pcs is unable to communicate with nodes missing names. It cannot # send new corosync.conf to them. That might break the cluster. # Hence we error out. error_on_missing_name=True, ) report_processor.report_list(report_list) if report_processor.has_errors: raise LibraryError() cfg.add_quorum_device( model, model_options, generic_options, heuristics_options, ) if cfg.is_quorum_device_heuristics_enabled_with_no_exec(): lib_env.report_processor.report( ReportItem.warning( reports.messages.CorosyncQuorumHeuristicsEnabledWithNoExec())) # First setup certificates for qdevice, then send corosync.conf to nodes. # If anything fails, nodes will not have corosync.conf with qdevice in it, # so there is no effect on the cluster. if lib_env.is_corosync_conf_live: target_factory = lib_env.get_node_target_factory() target_list = target_factory.get_target_list( cluster_nodes_names, skip_non_existing=skip_offline_nodes, ) # Do model specific configuration. # If the model is not known to pcs and was forced, do not configure # anything else than corosync.conf, as we do not know what to do # anyway. if model == "net": qdevice_net.set_up_client_certificates( lib_env.cmd_runner(), lib_env.report_processor, lib_env.communicator_factory, # We are sure the "host" key is there, it has been validated # above. target_factory.get_target_from_hostname(model_options["host"]), cfg.get_cluster_name(), target_list, skip_offline_nodes, ) lib_env.report_processor.report( ReportItem.info( reports.messages.ServiceActionStarted( reports.const.SERVICE_ACTION_ENABLE, "corosync-qdevice"))) com_cmd = qdevice_com.Enable(lib_env.report_processor, skip_offline_nodes) com_cmd.set_targets(target_list) run_and_raise(lib_env.get_node_communicator(), com_cmd) # everything set up, it's safe to tell the nodes to use qdevice lib_env.push_corosync_conf(cfg, skip_offline_nodes) # Now, when corosync.conf has been reloaded, we can start qdevice service. if lib_env.is_corosync_conf_live: lib_env.report_processor.report( ReportItem.info( reports.messages.ServiceActionStarted( reports.const.SERVICE_ACTION_START, "corosync-qdevice"))) com_cmd_start = qdevice_com.Start(lib_env.report_processor, skip_offline_nodes) com_cmd_start.set_targets(target_list) run_and_raise(lib_env.get_node_communicator(), com_cmd_start)
def full_cluster_status_plaintext( env: LibraryEnvironment, hide_inactive_resources: bool = False, verbose: bool = False, ) -> str: """ Return full cluster status as plaintext env -- LibraryEnvironment hide_inactive_resources -- if True, do not display non-running resources verbose -- if True, display more info """ # pylint: disable=too-many-branches # pylint: disable=too-many-locals # validation if not env.is_cib_live and env.is_corosync_conf_live: raise LibraryError( reports.live_environment_not_consistent( [file_type_codes.CIB], [file_type_codes.COROSYNC_CONF], )) if env.is_cib_live and not env.is_corosync_conf_live: raise LibraryError( reports.live_environment_not_consistent( [file_type_codes.COROSYNC_CONF], [file_type_codes.CIB], )) # initialization runner = env.cmd_runner() report_processor = SimpleReportProcessor(env.report_processor) live = env.is_cib_live and env.is_corosync_conf_live is_sbd_running = False # load status, cib, corosync.conf status_text, warning_list = get_cluster_status_text( runner, hide_inactive_resources, verbose) corosync_conf = env.get_corosync_conf() cib = env.get_cib() if verbose: ticket_status_text, ticket_status_stderr, ticket_status_retval = ( get_ticket_status_text(runner)) # get extra info if live if live: try: is_sbd_running = is_service_running(runner, get_sbd_service_name()) except LibraryError: pass local_services_status = _get_local_services_status(runner) if verbose: node_name_list, node_names_report_list = get_existing_nodes_names( corosync_conf) report_processor.report_list(node_names_report_list) node_reachability = _get_node_reachability( env.get_node_target_factory(), env.get_node_communicator(), report_processor, node_name_list, ) # check stonith configuration warning_list = list(warning_list) warning_list.extend(_stonith_warnings(cib, is_sbd_running)) # put it all together if report_processor.has_errors: raise LibraryError() parts = [] parts.append(f"Cluster name: {corosync_conf.get_cluster_name()}") if warning_list: parts.extend(["", "WARNINGS:"] + warning_list + [""]) parts.append(status_text) if verbose: parts.extend(["", "Tickets:"]) if ticket_status_retval != 0: ticket_warning_parts = [ "WARNING: Unable to get information about tickets" ] if ticket_status_stderr: ticket_warning_parts.extend( indent(ticket_status_stderr.splitlines())) parts.extend(indent(ticket_warning_parts)) else: parts.extend(indent(ticket_status_text.splitlines())) if live: if verbose: parts.extend(["", "PCSD Status:"]) parts.extend( indent( _format_node_reachability(node_name_list, node_reachability))) parts.extend(["", "Daemon Status:"]) parts.extend( indent(_format_local_services_status(local_services_status))) return "\n".join(parts)