def create_in_cluster( env: LibraryEnvironment, ip, instance_name=None, allow_absent_resource_agent=False, ): """ Create group with ip resource and booth resource env -- provides all for communication with externals string ip -- float ip address for the operation of the booth string instance_name -- booth instance name bool allow_absent_resource_agent -- allowing creating booth resource even if its agent is not installed """ report_processor = env.report_processor booth_env = env.get_booth_env(instance_name) # Booth config path goes to CIB. Working with a mocked booth configs would # not work coorectly as the path would point to a mock file (the path to a # mock file is unknown to us in the lib anyway) # It makes sense to work with a mocked CIB, though. Users can do other # changes to the CIB and push them to the cluster at once. _ensure_live_booth_env(booth_env) resources_section = get_resources(env.get_cib()) id_provider = IdProvider(resources_section) instance_name = booth_env.instance_name # validate if resource.find_for_config(resources_section, booth_env.config_path): report_processor.report( ReportItem.error(reports.messages.BoothAlreadyInCib(instance_name)) ) # verify the config exists and is readable try: booth_env.config.raw_file.read() except RawFileError as e: report_processor.report(raw_file_error_report(e)) if report_processor.has_errors: raise LibraryError() # validation done create_id = partial( resource.create_resource_id, resources_section, instance_name ) get_agent = partial( find_valid_resource_agent_by_name, env.report_processor, env.cmd_runner(), allowed_absent=allow_absent_resource_agent, ) create_primitive = partial( primitive.create, env.report_processor, resources_section, id_provider ) into_booth_group = partial( group.place_resource, group.provide_group(resources_section, create_id("group")), ) into_booth_group( create_primitive( create_id("ip"), get_agent("ocf:heartbeat:IPaddr2"), instance_attributes={"ip": ip}, ) ) into_booth_group( create_primitive( create_id("service"), get_agent("ocf:pacemaker:booth-site"), instance_attributes={"config": booth_env.config_path}, ) ) env.push_cib()
def raiser(dummy_communicator, dummy_reporter, dummy_nodes, dummy_force): raise LibraryError( reports.corosync_not_running_check_node_error("test node"))
def report_keyfile_io_error(file_path, operation, e): return LibraryError( common_reports.file_io_error(file_role=env_file_role_codes.BOOTH_KEY, file_path=file_path, operation=operation, reason=format_environment_error(e)))
def _prepare_pacemaker_remote_environment( env, report_processor, existing_nodes_target_list, new_node_target, new_node_name, skip_offline_nodes, allow_incomplete_distribution, allow_fails, ): if new_node_target: com_cmd = GetOnlineTargets( report_processor, ignore_offline_targets=skip_offline_nodes, ) com_cmd.set_targets([new_node_target]) online_new_target_list = run_com(env.get_node_communicator(), com_cmd) if not online_new_target_list and not skip_offline_nodes: raise LibraryError() else: online_new_target_list = [] # check new nodes if online_new_target_list: com_cmd = GetHostInfo(report_processor) com_cmd.set_targets(online_new_target_list) report_processor.report_list( _host_check_remote_node( run_com(env.get_node_communicator(), com_cmd))) if report_processor.has_errors: raise LibraryError() else: report_processor.report_list( _reports_skip_new_node(new_node_name, "unreachable")) # share pacemaker authkey authkey_file = FileInstance.for_pacemaker_key() try: if authkey_file.raw_file.exists(): authkey_content = authkey_file.read_raw() authkey_targets = online_new_target_list else: authkey_content = generate_binary_key( random_bytes_count=settings.pacemaker_authkey_bytes) authkey_targets = (existing_nodes_target_list + online_new_target_list) except RawFileError as e: report_processor.report(raw_file_error_report(e)) if report_processor.has_errors: raise LibraryError() if authkey_targets: com_cmd = DistributeFiles( report_processor, node_communication_format.pcmk_authkey_file(authkey_content), skip_offline_targets=skip_offline_nodes, allow_fails=allow_incomplete_distribution, ) com_cmd.set_targets(authkey_targets) run_and_raise(env.get_node_communicator(), com_cmd) # start and enable pacemaker_remote if online_new_target_list: com_cmd = ServiceAction( report_processor, node_communication_format.create_pcmk_remote_actions([ "start", "enable", ]), allow_fails=allow_fails, ) com_cmd.set_targets(online_new_target_list) run_and_raise(env.get_node_communicator(), com_cmd)
def node_add_remote( env: LibraryEnvironment, node_name: str, node_addr: Optional[str], operations: Iterable[Mapping[str, str]], meta_attributes: Mapping[str, str], instance_attributes: Mapping[str, str], skip_offline_nodes: bool = False, allow_incomplete_distribution: bool = False, allow_pacemaker_remote_service_fail: bool = False, allow_invalid_operation: bool = False, allow_invalid_instance_attributes: bool = False, use_default_operations: bool = True, wait: WaitType = False, ): # pylint: disable=too-many-arguments # pylint: disable=too-many-branches # pylint: disable=too-many-locals # pylint: disable=too-many-statements """ create an ocf:pacemaker:remote resource and use it as a remote node env -- provides all for communication with externals node_name -- the name of the new node node_addr -- the address of the new node or None for default operations -- attributes for each entered operation meta_attributes -- attributes for primitive/meta_attributes instance_attributes -- attributes for primitive/instance_attributes skip_offline_nodes -- if True, ignore when some nodes are offline allow_incomplete_distribution -- if True, allow this command to finish successfully even if file distribution did not succeed allow_pacemaker_remote_service_fail -- if True, allow this command to finish successfully even if starting/enabling pacemaker_remote did not succeed allow_invalid_operation -- if True, allow to use operations that are not listed in a resource agent metadata allow_invalid_instance_attributes -- if True, allow to use instance attributes that are not listed in a resource agent metadata and allow to omit required instance_attributes use_default_operations -- if True, add operations specified in a resource agent metadata to the resource wait -- a flag for controlling waiting for pacemaker idle mechanism """ env.ensure_wait_satisfiable(wait) report_processor = env.report_processor cib = env.get_cib( minimal_version=get_required_cib_version_for_primitive(operations)) id_provider = IdProvider(cib) if env.is_cib_live: corosync_conf: Optional[CorosyncConfigFacade] = env.get_corosync_conf() else: corosync_conf = None report_processor.report( ReportItem.info( reports.messages.CorosyncNodeConflictCheckSkipped( reports.const.REASON_NOT_LIVE_CIB, ))) ( existing_nodes_names, existing_nodes_addrs, report_list, ) = get_existing_nodes_names_addrs(corosync_conf, cib) if env.is_cib_live: # We just reported corosync checks are going to be skipped so we # shouldn't complain about errors related to corosync nodes report_processor.report_list(report_list) resource_agent = remote_node.get_agent(env.report_processor, env.cmd_runner()) existing_target_list = [] if env.is_cib_live: target_factory = env.get_node_target_factory() existing_target_list, new_target_list = _get_targets_for_add( target_factory, report_processor, existing_nodes_names, [node_name], skip_offline_nodes, ) new_target = new_target_list[0] if new_target_list else None # default node_addr to an address from known-hosts if node_addr is None: if new_target: node_addr = new_target.first_addr node_addr_source = ( reports.const.DEFAULT_ADDRESS_SOURCE_KNOWN_HOSTS) else: node_addr = node_name node_addr_source = ( reports.const.DEFAULT_ADDRESS_SOURCE_HOST_NAME) report_processor.report( ReportItem.info( reports.messages.UsingDefaultAddressForHost( node_name, node_addr, node_addr_source))) else: # default node_addr to an address from known-hosts if node_addr is None: known_hosts = env.get_known_hosts([node_name]) if known_hosts: node_addr = known_hosts[0].dest.addr node_addr_source = ( reports.const.DEFAULT_ADDRESS_SOURCE_KNOWN_HOSTS) else: node_addr = node_name node_addr_source = ( reports.const.DEFAULT_ADDRESS_SOURCE_HOST_NAME) report_processor.report( ReportItem.info( reports.messages.UsingDefaultAddressForHost( node_name, node_addr, node_addr_source))) # validate inputs report_list = remote_node.validate_create( existing_nodes_names, existing_nodes_addrs, resource_agent, node_name, node_addr, instance_attributes, ) if report_processor.report_list(report_list).has_errors: raise LibraryError() # validation + cib setup # TODO extract the validation to a separate function try: remote_resource_element = remote_node.create( env.report_processor, resource_agent, get_resources(cib), id_provider, node_addr, node_name, operations, meta_attributes, instance_attributes, allow_invalid_operation, allow_invalid_instance_attributes, use_default_operations, ) except LibraryError as e: # Check unique id conflict with check against nodes. Until validation # resource create is not separated, we need to make unique post # validation. already_exists = [] unified_report_list = [] for report_item in report_list + list(e.args): # pylint: disable=no-member dto_obj = report_item.message.to_dto() if dto_obj.code not in ( reports.codes.ID_ALREADY_EXISTS, reports.codes.RESOURCE_INSTANCE_ATTR_VALUE_NOT_UNIQUE, ): unified_report_list.append(report_item) elif ("id" in dto_obj.payload and dto_obj.payload["id"] not in already_exists): unified_report_list.append(report_item) already_exists.append(dto_obj.payload["id"]) report_list = unified_report_list report_processor.report_list(report_list) if report_processor.has_errors: raise LibraryError() # everything validated, let's set it up if env.is_cib_live: _prepare_pacemaker_remote_environment( env, report_processor, existing_target_list, new_target, node_name, skip_offline_nodes, allow_incomplete_distribution, allow_pacemaker_remote_service_fail, ) else: report_processor.report_list( _reports_skip_new_node(node_name, "not_live_cib")) env.push_cib(wait=wait) if wait: _ensure_resource_running(env, remote_resource_element.attrib["id"])
def remove_device(lib_env: LibraryEnvironment, skip_offline_nodes=False): """ Stop using quorum device, distribute and reload configs if live skip_offline_nodes continue even if not all nodes are accessible """ cfg = lib_env.get_corosync_conf() if not cfg.has_quorum_device(): raise LibraryError( ReportItem.error(reports.messages.QdeviceNotDefined()) ) model = cfg.get_quorum_device_model() cfg.remove_quorum_device() if lib_env.is_corosync_conf_live: report_processor = lib_env.report_processor # get nodes for communication cluster_nodes_names, report_list = get_existing_nodes_names( cfg, # Pcs is unable to communicate with nodes missing names. It cannot # send new corosync.conf to them. That might break the cluster. # Hence we error out. error_on_missing_name=True, ) if report_processor.report_list(report_list).has_errors: raise LibraryError() target_list = lib_env.get_node_target_factory().get_target_list( cluster_nodes_names, skip_non_existing=skip_offline_nodes, ) # fix quorum options for SBD to work properly if sbd.atb_has_to_be_enabled(lib_env.cmd_runner(), cfg): lib_env.report_processor.report( ReportItem.warning( reports.messages.CorosyncQuorumAtbWillBeEnabledDueToSbd() ) ) cfg.set_quorum_options({"auto_tie_breaker": "1"}) # disable qdevice lib_env.report_processor.report( ReportItem.info( reports.messages.ServiceActionStarted( reports.const.SERVICE_ACTION_DISABLE, "corosync-qdevice" ) ) ) com_cmd_disable = qdevice_com.Disable( lib_env.report_processor, skip_offline_nodes ) com_cmd_disable.set_targets(target_list) run_and_raise(lib_env.get_node_communicator(), com_cmd_disable) # stop qdevice lib_env.report_processor.report( ReportItem.info( reports.messages.ServiceActionStarted( reports.const.SERVICE_ACTION_STOP, "corosync-qdevice" ) ) ) com_cmd_stop = qdevice_com.Stop( lib_env.report_processor, skip_offline_nodes ) com_cmd_stop.set_targets(target_list) run_and_raise(lib_env.get_node_communicator(), com_cmd_stop) # handle model specific configuration if model == "net": lib_env.report_processor.report( ReportItem.info( reports.messages.QdeviceCertificateRemovalStarted() ) ) com_cmd_client_destroy = qdevice_net_com.ClientDestroy( lib_env.report_processor, skip_offline_nodes ) com_cmd_client_destroy.set_targets(target_list) run_and_raise( lib_env.get_node_communicator(), com_cmd_client_destroy ) lib_env.push_corosync_conf(cfg, skip_offline_nodes)
def _defaults_update( env: LibraryEnvironment, cib_section_name: str, nvset_id: Optional[str], nvpairs: Mapping[str, str], pcs_command: reports.types.PcsCommand, ) -> None: cib = env.get_cib() id_provider = IdProvider(cib) if nvset_id is None: # Backward compatibility code to support an old use case where no id # was requested and provided and the first meta_attributes nvset was # created / updated. However, we check that there is only one nvset # present in the CIB to prevent breaking the configuration with # multiple nvsets in place. # This is to be supported as it provides means of easily managing # defaults if only one set of defaults is needed. # TODO move this to a separate lib command. if not nvpairs: return # Do not create new defaults element if we are only removing values # from it. only_removing = True for value in nvpairs.values(): if value != "": only_removing = False break if only_removing and not sections.exists(cib, cib_section_name): env.report_processor.report( ReportItem.warning(reports.messages.DefaultsCanBeOverriden())) return nvset_elements = nvpair_multi.find_nvsets( sections.get(cib, cib_section_name)) if len(nvset_elements) > 1: env.report_processor.report( reports.item.ReportItem.error( reports.messages.CibNvsetAmbiguousProvideNvsetId( pcs_command))) raise LibraryError() env.report_processor.report( ReportItem.warning(reports.messages.DefaultsCanBeOverriden())) if len(nvset_elements) == 1: nvpair_multi.nvset_update(nvset_elements[0], id_provider, nvpairs) elif only_removing: # do not create new nvset if there is none and we are only removing # nvpairs return else: nvpair_multi.nvset_append_new( sections.get(cib, cib_section_name), id_provider, get_pacemaker_version_by_which_cib_was_validated(cib), nvpair_multi.NVSET_META, nvpairs, {}, ) env.push_cib() return nvset_elements, report_list = nvpair_multi.find_nvsets_by_ids( sections.get(cib, cib_section_name), [nvset_id]) if env.report_processor.report_list(report_list).has_errors: raise LibraryError() nvpair_multi.nvset_update(nvset_elements[0], id_provider, nvpairs) env.report_processor.report( ReportItem.warning(reports.messages.DefaultsCanBeOverriden())) env.push_cib()
def ensure_wait_for_idle_support(runner): if not has_wait_for_idle_support(runner): raise LibraryError(reports.wait_for_idle_not_supported())
def get_cib(xml): try: return parse_cib_xml(xml) except (etree.XMLSyntaxError, etree.DocumentInvalid): raise LibraryError(reports.cib_load_error_invalid_format())
def get_cib(xml): try: return parse_cib_xml(xml) except (etree.XMLSyntaxError, etree.DocumentInvalid) as e: raise LibraryError( ReportItem.error(reports.messages.CibLoadErrorBadFormat(str(e))))
def ensure_wait_for_idle_support(runner): if not has_wait_for_idle_support(runner): raise LibraryError( ReportItem.error(reports.messages.WaitForIdleNotSupported()))
def send_all_config_to_node( communicator, reporter, target_list, rewrite_existing=False, skip_wrong_config=False, ): """ Send all booth configs from default booth config directory and theri authfiles to specified node. communicator -- NodeCommunicator reporter -- report processor target_list list -- list of targets to which configs should be delivered rewrite_existing -- if True rewrite existing file skip_wrong_config -- if True skip local configs that are unreadable """ # TODO adapt to new file transfer framework once it is written # TODO the function is not modular enough - it raises LibraryError file_list = [] for conf_file_name in sorted(config_files.get_all_configs_file_names()): config_file = FileInstance.for_booth_config(conf_file_name) try: booth_conf_data = config_file.raw_file.read() ( authfile_name, authfile_data, authfile_report_list, ) = config_files.get_authfile_name_and_data( config_file.raw_to_facade(booth_conf_data) ) reporter.report_list(authfile_report_list) file_list.append( { "name": conf_file_name, "data": booth_conf_data.decode("utf-8"), "is_authfile": False, } ) if authfile_name and authfile_data: file_list.append( { "name": authfile_name, "data": base64.b64encode(authfile_data).decode("utf-8"), "is_authfile": True, } ) except RawFileError as e: reporter.report( raw_file_error_report( e, force_code=report_codes.FORCE, is_forced_or_warning=skip_wrong_config, ) ) except ParserErrorException as e: reporter.report_list( config_file.parser_exception_to_report_list( e, force_code=report_codes.FORCE, is_forced_or_warning=skip_wrong_config, ) ) if reporter.has_errors: raise LibraryError() if not file_list: # no booth configs exist, nothing to be synced return reporter.report( ReportItem.info(reports.messages.BoothConfigDistributionStarted()) ) com_cmd = BoothSaveFiles( reporter, file_list, rewrite_existing=rewrite_existing ) com_cmd.set_targets(target_list) run(communicator, com_cmd) if reporter.has_errors: raise LibraryError()
def pull_config(env: LibraryEnvironment, node_name, instance_name=None): """ Get config from specified node and save it on local system. It will rewrite existing files. env string node_name -- name of the node from which the config should be fetched string instance_name -- booth instance name """ report_processor = env.report_processor booth_env = env.get_booth_env(instance_name) instance_name = booth_env.instance_name _ensure_live_env(env, booth_env) env.report_processor.report( ReportItem.info( reports.messages.BoothFetchingConfigFromNode( node_name, config=instance_name, ) ) ) com_cmd = BoothGetConfig(env.report_processor, instance_name) com_cmd.set_targets( [env.get_node_target_factory().get_target_from_hostname(node_name)] ) # pylint: disable=unsubscriptable-object # In general, pylint is right. And it cannot know in this case code is OK. # It is covered by tests. output = run_and_raise(env.get_node_communicator(), com_cmd)[0][1] try: # TODO adapt to new file transfer framework once it is written if ( output["authfile"]["name"] is not None and output["authfile"]["data"] ): authfile_name = output["authfile"]["name"] report_list = config_validators.check_instance_name(authfile_name) if report_list: raise LibraryError(*report_list) booth_key = FileInstance.for_booth_key(authfile_name) booth_key.write_raw( base64.b64decode(output["authfile"]["data"].encode("utf-8")), can_overwrite=True, ) booth_env.config.write_raw( output["config"]["data"].encode("utf-8"), can_overwrite=True ) env.report_processor.report( ReportItem.info( reports.messages.BoothConfigAcceptedByNode( name_list=[instance_name] ) ) ) except RawFileError as e: report_processor.report(raw_file_error_report(e)) except KeyError: raise LibraryError( ReportItem.error(reports.messages.InvalidResponseFormat(node_name)) ) if report_processor.has_errors: raise LibraryError()
def config_sync( env: LibraryEnvironment, instance_name=None, skip_offline_nodes=False, ): """ Send specified local booth configuration to all nodes in the local cluster. env string instance_name -- booth instance name skip_offline_nodes -- if True offline nodes will be skipped """ report_processor = env.report_processor booth_env = env.get_booth_env(instance_name) if not env.is_cib_live: raise LibraryError( ReportItem.error( reports.messages.LiveEnvironmentRequired([file_type_codes.CIB]) ) ) cluster_nodes_names, report_list = get_existing_nodes_names( env.get_corosync_conf() ) if not cluster_nodes_names: report_list.append( ReportItem.error(reports.messages.CorosyncConfigNoNodesDefined()) ) report_processor.report_list(report_list) try: booth_conf_data = booth_env.config.read_raw() booth_conf = booth_env.config.raw_to_facade(booth_conf_data) if isinstance(booth_env.config.raw_file, GhostFile): authfile_data = booth_env.key.read_raw() authfile_path = booth_conf.get_authfile() authfile_name = ( os.path.basename(authfile_path) if authfile_path else None ) else: ( authfile_name, authfile_data, authfile_report_list, ) = config_files.get_authfile_name_and_data(booth_conf) report_processor.report_list(authfile_report_list) except RawFileError as e: report_processor.report(raw_file_error_report(e)) except ParserErrorException as e: report_processor.report_list( booth_env.config.parser_exception_to_report_list(e) ) if report_processor.has_errors: raise LibraryError() com_cmd = BoothSendConfig( env.report_processor, booth_env.instance_name, booth_conf_data, authfile=authfile_name, authfile_data=authfile_data, skip_offline_targets=skip_offline_nodes, ) com_cmd.set_targets( env.get_node_target_factory().get_target_list( cluster_nodes_names, skip_non_existing=skip_offline_nodes, ) ) run_and_raise(env.get_node_communicator(), com_cmd)
def remove_levels_by_params( lib_env: LibraryEnvironment, level=None, target_type=None, target_value=None, devices=None, ignore_if_missing=False, target_may_be_a_device=False, ): """ Remove specified fencing level(s). LibraryEnvironment lib_env -- environment int|string level -- level (index) of the fencing level to remove constant target_type -- the removed fencing level target value type mixed target_value -- the removed fencing level target value Iterable devices -- list of stonith devices of the removed fencing level bool ignore_if_missing -- when True, do not report if level not found target_may_be_a_device -- enables backward compatibility mode for old CLI """ topology_el = get_fencing_topology(lib_env.get_cib()) report_list = cib_fencing_topology.remove_levels_by_params( topology_el, level, target_type, target_value, devices, ignore_if_missing, ) if not target_may_be_a_device or target_type != TARGET_TYPE_NODE: if lib_env.report_processor.report_list(report_list).has_errors: raise LibraryError() lib_env.push_cib() return # backward compatibility mode # CLI command parameters are: level, node, stonith, stonith... Both the # node and the stonith list are optional. If the node is ommited and the # stonith list is present, there is no way to figure it out, since there is # no specification of what the parameter is. Hence the pre-lib code tried # both. First it assumed the first parameter is a node. If that fence level # didn't exist, it assumed the first parameter is a device. Since it was # only possible to specify node as a target back then, this is enabled only # in that case. # CLI has no way to figure out what the first parameter is. Therefore, the # lib must try both cases if asked to do so. if not report.has_errors(report_list): lib_env.report_processor.report_list(report_list) lib_env.push_cib() return level_not_found = False for report_item in report_list: if ( report_item.message.code == report.codes.CIB_FENCING_LEVEL_DOES_NOT_EXIST ): level_not_found = True break if not level_not_found: lib_env.report_processor.report_list(report_list) raise LibraryError() target_and_devices = [target_value] if devices: target_and_devices.extend(devices) report_list_second = cib_fencing_topology.remove_levels_by_params( topology_el, level, None, None, target_and_devices, ignore_if_missing ) if not report.has_errors(report_list_second): lib_env.report_processor.report_list(report_list_second) lib_env.push_cib() return lib_env.report_processor.report_list(report_list) lib_env.report_processor.report_list(report_list_second) raise LibraryError()
def test_file_error(self, mock_config): mock_config.side_effect = LibraryError( ReportItem.error(report_codes.UNABLE_TO_GET_SBD_CONFIG, )) assert_raise_library_error( lambda: cmd_sbd.get_local_sbd_config(self.mock_env), (Severities.ERROR, report_codes.UNABLE_TO_GET_SBD_CONFIG, {}))
def _check_model(model): if model != "net": raise LibraryError( ReportItem.error( reports.messages.InvalidOptionValue("model", model, ["net"])))
def send(self): errors = self._send(self.items, print_errors=False) if errors and self.raise_on_errors: raise LibraryError(*errors)
def run(self, args, stdin_string=None, env_extend=None, binary_output=False): # Allow overriding default settings. If a piece of code really wants to # set own PATH or CIB_file, we must allow it. I.e. it wants to run # a pacemaker tool on a CIB in a file but cannot afford the risk of # changing the CIB in the file specified by the user. env_vars = self._env_vars.copy() env_vars.update(dict(env_extend) if env_extend else dict()) log_args = " ".join([shell_quote(x) for x in args]) self._logger.debug( "Running: {args}\nEnvironment:{env_vars}{stdin_string}".format( args=log_args, stdin_string=("" if not stdin_string else ( "\n--Debug Input Start--\n{0}\n--Debug Input End--".format( stdin_string))), env_vars=("" if not env_vars else ("\n" + "\n".join([ " {0}={1}".format(key, val) for key, val in sorted(env_vars.items()) ]))))) self._reporter.report( reports.run_external_process_started(log_args, stdin_string, env_vars)) try: # pylint: disable=subprocess-popen-preexec-fn # this is OK as pcs is only single-threaded application process = subprocess.Popen( args, # Some commands react differently if they get anything via stdin stdin=(subprocess.PIPE if stdin_string is not None else subprocess.DEVNULL), stdout=subprocess.PIPE, stderr=subprocess.PIPE, preexec_fn=( lambda: signal.signal(signal.SIGPIPE, signal.SIG_DFL)), close_fds=True, shell=False, env=env_vars, # decodes newlines and in python3 also converts bytes to str universal_newlines=(not binary_output)) out_std, out_err = process.communicate(stdin_string) retval = process.returncode except OSError as e: raise LibraryError( reports.run_external_process_error(log_args, e.strerror)) self._logger.debug( ("Finished running: {args}\nReturn value: {retval}" + "\n--Debug Stdout Start--\n{out_std}\n--Debug Stdout End--" + "\n--Debug Stderr Start--\n{out_err}\n--Debug Stderr End--" ).format(args=log_args, retval=retval, out_std=out_std, out_err=out_err)) self._reporter.report( reports.run_external_process_finished(log_args, retval, out_std, out_err)) return out_std, out_err, retval
def full_cluster_status_plaintext( env: LibraryEnvironment, hide_inactive_resources: bool = False, verbose: bool = False, ) -> str: """ Return full cluster status as plaintext env -- LibraryEnvironment hide_inactive_resources -- if True, do not display non-running resources verbose -- if True, display more info """ # pylint: disable=too-many-branches # pylint: disable=too-many-locals # pylint: disable=too-many-statements # validation if not env.is_cib_live and env.is_corosync_conf_live: raise LibraryError( ReportItem.error( reports.messages.LiveEnvironmentNotConsistent( [file_type_codes.CIB], [file_type_codes.COROSYNC_CONF], ))) if env.is_cib_live and not env.is_corosync_conf_live: raise LibraryError( ReportItem.error( reports.messages.LiveEnvironmentNotConsistent( [file_type_codes.COROSYNC_CONF], [file_type_codes.CIB], ))) # initialization runner = env.cmd_runner() report_processor = env.report_processor live = env.is_cib_live and env.is_corosync_conf_live is_sbd_running = False # load status, cib, corosync.conf status_text, warning_list = get_cluster_status_text( runner, hide_inactive_resources, verbose) corosync_conf = None # If we are live on a remote node, we have no corosync.conf. # TODO Use the new file framework so the path is not exposed. if not live or os.path.exists(settings.corosync_conf_file): corosync_conf = env.get_corosync_conf() cib = env.get_cib() if verbose: ( ticket_status_text, ticket_status_stderr, ticket_status_retval, ) = get_ticket_status_text(runner) # get extra info if live if live: try: is_sbd_running = is_service_running(runner, get_sbd_service_name()) except LibraryError: pass local_services_status = _get_local_services_status(runner) if verbose and corosync_conf: node_name_list, node_names_report_list = get_existing_nodes_names( corosync_conf) report_processor.report_list(node_names_report_list) node_reachability = _get_node_reachability( env.get_node_target_factory(), env.get_node_communicator(), report_processor, node_name_list, ) # check stonith configuration warning_list = list(warning_list) warning_list.extend(_stonith_warnings(cib, is_sbd_running)) # put it all together if report_processor.has_errors: raise LibraryError() cluster_name = (corosync_conf.get_cluster_name() if corosync_conf else nvpair.get_value("cluster_property_set", get_crm_config(cib), "cluster-name", "")) parts = [] parts.append(f"Cluster name: {cluster_name}") if warning_list: parts.extend(["", "WARNINGS:"] + warning_list + [""]) parts.append(status_text) if verbose: parts.extend(["", "Tickets:"]) if ticket_status_retval != 0: ticket_warning_parts = [ "WARNING: Unable to get information about tickets" ] if ticket_status_stderr: ticket_warning_parts.extend( indent(ticket_status_stderr.splitlines())) parts.extend(indent(ticket_warning_parts)) else: parts.extend(indent(ticket_status_text.splitlines())) if live: if verbose and corosync_conf: parts.extend(["", "PCSD Status:"]) parts.extend( indent( _format_node_reachability(node_name_list, node_reachability))) parts.extend(["", "Daemon Status:"]) parts.extend( indent(_format_local_services_status(local_services_status))) return "\n".join(parts)
def _store_to_tmpfile(data, report_item_message): try: return write_tmpfile(data, binary=True) except EnvironmentError as e: raise LibraryError(ReportItem.error(report_item_message( e.strerror))) from e
def validate_id_does_not_exist(tree, id): """ tree cib etree node """ if does_id_exist(tree, id): raise LibraryError(reports.id_already_exists(id))
def _ensure_resource_running(env: LibraryEnvironment, resource_id): if env.report_processor.report( state.ensure_resource_running(env.get_cluster_state(), resource_id)).has_errors: raise LibraryError()
def test_remove_empty_clone_fail(self): constraints_before = """ <constraints> <rsc_order first="RM1-group-master" first-action="start" id="order-RM1-group-master-R1-mandatory" then="R1" then-action="start"/> <rsc_order first="RC1-group-clone" first-action="start" id="order-RC1-group-clone-R1-mandatory" then="R2" then-action="start"/> </constraints> """ resources_before = """ <resources> <primitive id="R1" /> <primitive id="R2" /> <clone id="RC1-group-clone"> <group id="RC1-group"> <primitive id="RC1" /> </group> </clone> <master id="RM1-group-master"> <group id="RM1-group"> <primitive id="RM1" /> </group> </master> </resources> """ resources_after = """ <resources> <primitive id="R1"></primitive> <primitive id="R2"></primitive> <group id="G"> <primitive id="RC1"></primitive> <primitive id="RM1"></primitive> </group> </resources> """ self.config.runner.cib.load( resources=resources_before, constraints=constraints_before).env.push_cib( exception=LibraryError( ReportItem.error( report_messages.CibPushError("stderr", "stdout"))), resources=resources_after, constraints=constraints_before, ) self.env_assist.assert_raise_library_error(lambda: resource.group_add( self.env_assist.get_env(), "G", ["RC1", "RM1"], )) self.env_assist.assert_reports([ fixture.error( report_codes.CANNOT_LEAVE_GROUP_EMPTY_AFTER_MOVE, group_id="RC1-group", inner_resource_ids=["RC1"], ), fixture.error( report_codes.CANNOT_LEAVE_GROUP_EMPTY_AFTER_MOVE, group_id="RM1-group", inner_resource_ids=["RM1"], ), ])
def node_add_guest( env, node_name, resource_id, options, skip_offline_nodes=False, allow_incomplete_distribution=False, allow_pacemaker_remote_service_fail=False, wait=False, ): # pylint: disable=too-many-branches # pylint: disable=too-many-locals # pylint: disable=too-many-statements """ Make a guest node from the specified resource LibraryEnvironment env -- provides all for communication with externals string node_name -- name of the guest node string resource_id -- specifies resource that should become a guest node dict options -- guest node options (remote-port, remote-addr, remote-connect-timeout) bool skip_offline_nodes -- if True, ignore when some nodes are offline bool allow_incomplete_distribution -- if True, allow this command to finish successfully even if file distribution did not succeed bool allow_pacemaker_remote_service_fail -- if True, allow this command to finish successfully even if starting/enabling pacemaker_remote did not succeed mixed wait -- a flag for controlling waiting for pacemaker idle mechanism """ env.ensure_wait_satisfiable(wait) report_processor = env.report_processor cib = env.get_cib() id_provider = IdProvider(cib) if env.is_cib_live: corosync_conf = env.get_corosync_conf() else: corosync_conf = None report_processor.report( ReportItem.info( reports.messages.CorosyncNodeConflictCheckSkipped( reports.const.REASON_NOT_LIVE_CIB, ))) ( existing_nodes_names, existing_nodes_addrs, report_list, ) = get_existing_nodes_names_addrs(corosync_conf, cib) if env.is_cib_live: # We just reported corosync checks are going to be skipped so we # shouldn't complain about errors related to corosync nodes report_processor.report_list(report_list) existing_target_list = [] if env.is_cib_live: target_factory = env.get_node_target_factory() existing_target_list, new_target_list = _get_targets_for_add( target_factory, report_processor, existing_nodes_names, [node_name], skip_offline_nodes, ) new_target = new_target_list[0] if new_target_list else None # default remote-addr to an address from known-hosts if "remote-addr" not in options or options["remote-addr"] is None: if new_target: new_addr = new_target.first_addr new_addr_source = ( reports.const.DEFAULT_ADDRESS_SOURCE_KNOWN_HOSTS) else: new_addr = node_name new_addr_source = reports.const.DEFAULT_ADDRESS_SOURCE_HOST_NAME options["remote-addr"] = new_addr report_processor.report( ReportItem.info( reports.messages.UsingDefaultAddressForHost( node_name, new_addr, new_addr_source))) else: # default remote-addr to an address from known-hosts if "remote-addr" not in options or options["remote-addr"] is None: known_hosts = env.get_known_hosts([node_name]) if known_hosts: new_addr = known_hosts[0].dest.addr new_addr_source = ( reports.const.DEFAULT_ADDRESS_SOURCE_KNOWN_HOSTS) else: new_addr = node_name new_addr_source = reports.const.DEFAULT_ADDRESS_SOURCE_HOST_NAME options["remote-addr"] = new_addr report_processor.report( ReportItem.info( reports.messages.UsingDefaultAddressForHost( node_name, new_addr, new_addr_source))) # validate inputs report_list = guest_node.validate_set_as_guest(cib, existing_nodes_names, existing_nodes_addrs, node_name, options) searcher = ElementSearcher(primitive.TAG, resource_id, get_resources(cib)) if searcher.element_found(): resource_element = searcher.get_element() report_list.extend(guest_node.validate_is_not_guest(resource_element)) else: report_list.extend(searcher.get_errors()) report_processor.report_list(report_list) if report_processor.has_errors: raise LibraryError() # everything validated, let's set it up guest_node.set_as_guest( resource_element, id_provider, node_name, options.get("remote-addr", None), options.get("remote-port", None), options.get("remote-connect-timeout", None), ) if env.is_cib_live: _prepare_pacemaker_remote_environment( env, report_processor, existing_target_list, new_target, node_name, skip_offline_nodes, allow_incomplete_distribution, allow_pacemaker_remote_service_fail, ) else: report_processor.report_list( _reports_skip_new_node(node_name, "not_live_cib")) env.push_cib(wait=wait) if wait: _ensure_resource_running(env, resource_id)
def stonith_level_clear_cmd(lib, argv, modifiers): """ Options: * -f - CIB file """ modifiers.ensure_only_supported("-f") if not argv: lib.fencing_topology.remove_all_levels() return allowed_keywords = {"target", "stonith"} if len(argv) > 1 or (len(argv) == 1 and argv[0] in allowed_keywords): ( target_type, target_value, devices, ) = _stonith_level_parse_target_and_stonith(argv) if devices is not None and target_value is not None: raise CmdLineInputError( "Only one of 'target' and 'stonith' can be used") lib.fencing_topology.remove_levels_by_params( None, target_type, target_value, devices, ) return # TODO remove, deprecated backward compatibility mode for old syntax # Command parameters are: node, stonith-list # Both the node and the stonith list are optional. If the node is omitted # and the stonith list is present, there is no way to figure it out, since # there is no specification of what the parameter is. Hence the pre-lib # code tried both. It deleted all levels having the first parameter as # either a node or a device list. Since it was only possible to specify # node as a target back then, this is enabled only in that case. deprecation_warning( "Syntax 'pcs stonith level clear [<target> | <stonith id(s)>] is " "deprecated and will be removed. Please use 'pcs stonith level clear " "[target <target>] | [stonith <stonith id>...]'.") target_type, target_value = _stonith_level_parse_node(argv[0]) was_error = False try: lib.fencing_topology.remove_levels_by_params( None, target_type, target_value, None, # pre-lib code didn't return any error when no level was found ignore_if_missing=True, ) except LibraryError: was_error = True if target_type == TARGET_TYPE_NODE: try: lib.fencing_topology.remove_levels_by_params( None, None, None, argv[0].split(","), # pre-lib code didn't return any error when no level was found ignore_if_missing=True, ) except LibraryError: was_error = True if was_error: raise LibraryError()
def command_expect_live_env(self): if not self.__config.is_live: raise LibraryError(common_reports.live_environment_required([ "BOOTH_CONF", "BOOTH_KEY", ]))
def create( report_processor: ReportProcessor, resources_section, id_provider, resource_id, resource_agent, raw_operation_list=None, meta_attributes=None, instance_attributes=None, allow_invalid_operation=False, allow_invalid_instance_attributes=False, use_default_operations=True, resource_type="resource", do_not_report_instance_attribute_server_exists=False # TODO remove this arg ): # pylint: disable=too-many-arguments """ Prepare all parts of primitive resource and append it into cib. report_processor is a tool for warning/info/error reporting etree.Element resources_section is place where new element will be appended IdProvider id_provider -- elements' ids generator string resource_id is id of new resource lib.resource_agent.CrmAgent resource_agent list of dict raw_operation_list specifies operations of resource dict meta_attributes specifies meta attributes of resource dict instance_attributes specifies instance attributes of resource bool allow_invalid_operation is flag for skipping validation of operations bool allow_invalid_instance_attributes is flag for skipping validation of instance_attributes bool use_default_operations is flag for completion operations with default actions specified in resource agent string resource_type -- describes the resource for reports bool do_not_report_instance_attribute_server_exists -- dirty fix due to suboptimal architecture, TODO: fix the architecture and remove the param """ if raw_operation_list is None: raw_operation_list = [] if meta_attributes is None: meta_attributes = {} if instance_attributes is None: instance_attributes = {} if does_id_exist(resources_section, resource_id): raise LibraryError(reports.id_already_exists(resource_id)) validate_id(resource_id, "{0} name".format(resource_type)) operation_list = prepare_operations( report_processor, raw_operation_list, resource_agent.get_cib_default_actions( necessary_only=not use_default_operations ), [operation["name"] for operation in resource_agent.get_actions()], allow_invalid=allow_invalid_operation, ) if report_processor.report_list( validate_resource_instance_attributes_create( resource_agent, instance_attributes, resources_section, force=allow_invalid_instance_attributes, do_not_report_instance_attribute_server_exists=( do_not_report_instance_attribute_server_exists ) ) ).has_errors: raise LibraryError() return append_new( resources_section, id_provider, resource_id, resource_agent.get_standard(), resource_agent.get_provider(), resource_agent.get_type(), instance_attributes=instance_attributes, meta_attributes=meta_attributes, operation_list=operation_list )
def add_device( lib_env: LibraryEnvironment, model, model_options, generic_options, heuristics_options, force_model=False, force_options=False, skip_offline_nodes=False, ): # pylint: disable=too-many-locals """ Add a quorum device to a cluster, distribute and reload configs if live string model -- quorum device model dict model_options -- model specific options dict generic_options -- generic quorum device options dict heuristics_options -- heuristics options bool force_model -- continue even if the model is not valid bool force_options -- continue even if options are not valid bool skip_offline_nodes -- continue even if not all nodes are accessible """ cfg = lib_env.get_corosync_conf() if cfg.has_quorum_device(): raise LibraryError( ReportItem.error(reports.messages.QdeviceAlreadyDefined())) report_processor = lib_env.report_processor report_processor.report_list( corosync_conf_validators.add_quorum_device( model, model_options, generic_options, heuristics_options, [node.nodeid for node in cfg.get_nodes()], force_model=force_model, force_options=force_options, )) if lib_env.is_corosync_conf_live: cluster_nodes_names, report_list = get_existing_nodes_names( cfg, # Pcs is unable to communicate with nodes missing names. It cannot # send new corosync.conf to them. That might break the cluster. # Hence we error out. error_on_missing_name=True, ) report_processor.report_list(report_list) if report_processor.has_errors: raise LibraryError() cfg.add_quorum_device( model, model_options, generic_options, heuristics_options, ) if cfg.is_quorum_device_heuristics_enabled_with_no_exec(): lib_env.report_processor.report( ReportItem.warning( reports.messages.CorosyncQuorumHeuristicsEnabledWithNoExec())) # First setup certificates for qdevice, then send corosync.conf to nodes. # If anything fails, nodes will not have corosync.conf with qdevice in it, # so there is no effect on the cluster. if lib_env.is_corosync_conf_live: target_factory = lib_env.get_node_target_factory() target_list = target_factory.get_target_list( cluster_nodes_names, skip_non_existing=skip_offline_nodes, ) # Do model specific configuration. # If the model is not known to pcs and was forced, do not configure # anything else than corosync.conf, as we do not know what to do # anyway. if model == "net": qdevice_net.set_up_client_certificates( lib_env.cmd_runner(), lib_env.report_processor, lib_env.communicator_factory, # We are sure the "host" key is there, it has been validated # above. target_factory.get_target_from_hostname(model_options["host"]), cfg.get_cluster_name(), target_list, skip_offline_nodes, ) lib_env.report_processor.report( ReportItem.info( reports.messages.ServiceActionStarted( reports.const.SERVICE_ACTION_ENABLE, "corosync-qdevice"))) com_cmd = qdevice_com.Enable(lib_env.report_processor, skip_offline_nodes) com_cmd.set_targets(target_list) run_and_raise(lib_env.get_node_communicator(), com_cmd) # everything set up, it's safe to tell the nodes to use qdevice lib_env.push_corosync_conf(cfg, skip_offline_nodes) # Now, when corosync.conf has been reloaded, we can start qdevice service. if lib_env.is_corosync_conf_live: lib_env.report_processor.report( ReportItem.info( reports.messages.ServiceActionStarted( reports.const.SERVICE_ACTION_START, "corosync-qdevice"))) com_cmd_start = qdevice_com.Start(lib_env.report_processor, skip_offline_nodes) com_cmd_start.set_targets(target_list) run_and_raise(lib_env.get_node_communicator(), com_cmd_start)
def config_destroy( env: LibraryEnvironment, instance_name: Optional[str] = None, ignore_config_load_problems: bool = False, ) -> None: # pylint: disable=too-many-branches """ remove booth configuration files env instance_name -- booth instance name ignore_config_load_problems -- delete as much as possible when unable to read booth configs for the given booth instance """ report_processor = env.report_processor booth_env = env.get_booth_env(instance_name) found_instance_name = booth_env.instance_name _ensure_live_env(env, booth_env) booth_resource_list = resource.find_for_config( get_resources(env.get_cib()), booth_env.config_path, ) if booth_resource_list: report_processor.report( ReportItem.error( reports.messages.BoothConfigIsUsed( found_instance_name, reports.const.BOOTH_CONFIG_USED_IN_CLUSTER_RESOURCE, resource_name=booth_resource_list[0].get("id", ""), ) ) ) # Only systemd is currently supported. Initd does not supports multiple # instances (here specified by name) if external.is_systemctl(): if external.is_service_running( env.cmd_runner(), "booth", found_instance_name ): report_processor.report( ReportItem.error( reports.messages.BoothConfigIsUsed( found_instance_name, reports.const.BOOTH_CONFIG_USED_RUNNING_IN_SYSTEMD, ) ) ) if external.is_service_enabled( env.cmd_runner(), "booth", found_instance_name ): report_processor.report( ReportItem.error( reports.messages.BoothConfigIsUsed( found_instance_name, reports.const.BOOTH_CONFIG_USED_ENABLED_IN_SYSTEMD, ) ) ) if report_processor.has_errors: raise LibraryError() try: authfile_path = None booth_conf = booth_env.config.read_to_facade() authfile_path = booth_conf.get_authfile() except RawFileError as e: report_processor.report( raw_file_error_report( e, force_code=report_codes.FORCE_BOOTH_DESTROY, is_forced_or_warning=ignore_config_load_problems, ) ) except ParserErrorException as e: report_processor.report_list( booth_env.config.parser_exception_to_report_list( e, force_code=report_codes.FORCE_BOOTH_DESTROY, is_forced_or_warning=ignore_config_load_problems, ) ) if report_processor.has_errors: raise LibraryError() if authfile_path: authfile_dir, authfile_name = os.path.split(authfile_path) if (authfile_dir == settings.booth_config_dir) and authfile_name: try: key_file = FileInstance.for_booth_key(authfile_name) key_file.raw_file.remove(fail_if_file_not_found=False) except RawFileError as e: report_processor.report( raw_file_error_report( e, force_code=report_codes.FORCE_BOOTH_DESTROY, is_forced_or_warning=ignore_config_load_problems, ) ) else: report_processor.report( ReportItem.warning( reports.messages.BoothUnsupportedFileLocation( authfile_path, settings.booth_config_dir, file_type_codes.BOOTH_KEY, ) ) ) if report_processor.has_errors: raise LibraryError() try: booth_env.config.raw_file.remove() except RawFileError as e: report_processor.report(raw_file_error_report(e)) if report_processor.has_errors: raise LibraryError()