def _validate_network_options_update(bundle_el, network_el, options, force_options): report_list = [] inner_primitive = get_inner_resource(bundle_el) if (inner_primitive is not None and not _is_pcmk_remote_acccessible_after_update(network_el, options)): report_list.append( reports.get_problem_creator( report_codes.FORCE_OPTIONS, force_options)(reports.resource_in_bundle_not_accessible, bundle_el.get("id"), inner_primitive.get("id"))) kwargs = validate.set_warning(report_codes.FORCE_OPTIONS, force_options) validators_optional_options = [ # TODO add validators for other keys (ip-range-start - IPv4) validate.ValuePortNumber("control-port"), # Leaving a possibility to force this validation for the case pacemaker # starts supporting IPv6 or other format of the netmask. ValueHostNetmask("host-netmask", **kwargs), ] for val in validators_optional_options: val.empty_string_valid = True validators = [ validate.NamesIn( # allow to remove options even if they are not allowed NETWORK_OPTIONS | _options_to_remove(options), option_type="network", **kwargs) ] + validators_optional_options return (report_list + validate.ValidatorAll(validators).validate(options))
def _validate_network_options_update(bundle_el, network_el, options, force_options): report_list = [] inner_primitive = get_inner_resource(bundle_el) if (inner_primitive is not None and not _is_pcmk_remote_acccessible_after_update(network_el, options)): report_list.append( reports.get_problem_creator( report_codes.FORCE_OPTIONS, force_options)(reports.resource_in_bundle_not_accessible, bundle_el.get("id"), inner_primitive.get("id"))) validators = [ # TODO add validators for other keys (ip-range-start - IPv4) validate.value_empty_or_valid( "control-port", validate.value_port_number("control-port"), ), validate.value_empty_or_valid( "host-netmask", _value_host_netmask("host-netmask", force_options), ), ] return (report_list + validate.run_collection_of_option_validators(options, validators) + validate.names_in( # allow to remove options even if they are not allowed _network_options | _options_to_remove(options), options.keys(), "network", report_codes.FORCE_OPTIONS, force_options))
def names_in(allowed_name_list, name_list, option_type="option", code_to_allow_extra_names=None, allow_extra_names=False): """ Return a list with report INVALID_OPTION when in name_list is a name that is not in allowed_name_list. list allowed_name_list contains names which are valid list name_list contains names for validation string option_type describes type of option for reporting purposes string code_to_allow_extra_names is code for forcing invalid names. If it is empty report INVALID_OPTION is non-forceable error. If it is not empty report INVALID_OPTION is forceable error or warning. bool allow_extra_names is flag that complements code_to_allow_extra_names and determines wheter is report INVALID_OPTION forceable error or warning. """ invalid_names = set(name_list) - set(allowed_name_list) if not invalid_names: return [] create_report = reports.get_problem_creator(code_to_allow_extra_names, allow_extra_names) return [ create_report( reports.invalid_option, sorted(invalid_names), sorted(allowed_name_list), option_type, ) ]
def _find_resources_to_remove( cib, report_processor: ReportProcessor, node_type, node_identifier, allow_remove_multiple_nodes, find_resources ): resource_element_list = find_resources(get_resources(cib), node_identifier) if not resource_element_list: raise LibraryError(reports.node_not_found(node_identifier, node_type)) if len(resource_element_list) > 1: if report_processor.report( reports.get_problem_creator( report_codes.FORCE_REMOVE_MULTIPLE_NODES, allow_remove_multiple_nodes )( reports.multiple_result_found, "resource", [resource.attrib["id"] for resource in resource_element_list], node_identifier ) ).has_errors: raise LibraryError() return resource_element_list
def exception_to_report_list( exception: JsonParserException, file_type_code: code.FileTypeCode, file_path: str, force_code: str, # TODO: fix is_forced_or_warning: bool ) -> ReportItemList: report_creator = reports.get_problem_creator( force_code=force_code, is_forced=is_forced_or_warning ) if isinstance(exception, JsonParserException): if isinstance(exception.json_exception, json.JSONDecodeError): return [ report_creator( reports.parse_error_json_file, file_type_code, exception.json_exception.lineno, exception.json_exception.colno, exception.json_exception.pos, exception.json_exception.msg, str(exception.json_exception), file_path=file_path, ) ] raise exception
def _validate_guest_change(tree, existing_nodes_names, existing_nodes_addrs, meta_attributes, allow_not_suitable_command, detect_remove=False): if not resource.guest_node.is_node_name_in_options(meta_attributes): return [] node_name = resource.guest_node.get_node_name_from_options(meta_attributes) report_list = [] create_report = reports.use_command_node_add_guest if (detect_remove and not resource.guest_node.get_guest_option_value(meta_attributes)): create_report = reports.use_command_node_remove_guest report_list.append( reports.get_problem_creator(report_codes.FORCE_NOT_SUITABLE_COMMAND, allow_not_suitable_command)(create_report)) report_list.extend( resource.guest_node.validate_conflicts(tree, existing_nodes_names, existing_nodes_addrs, node_name, meta_attributes)) return report_list
def validate(self, option_dict): name_set = set(option_dict.keys()) banned_names = set() if not (self._code_for_warning is None and not self._produce_warning): banned_names = name_set & self._banned_name_set invalid_names = name_set - set(self._option_name_list) - banned_names report_list = [] create_report = reports.get_problem_creator(self._code_for_warning, self._produce_warning) if invalid_names: report_list.append( create_report( reports.invalid_options, invalid_names, self._option_name_list, self._option_type, allowed_option_patterns=self._allowed_option_patterns, )) if banned_names: report_list.append( reports.invalid_options( banned_names, self._option_name_list, self._option_type, )) return report_list
def names_in( allowed_name_list, name_list, option_type="option", code_to_allow_extra_names=None, allow_extra_names=False ): """ Return a list with report INVALID_OPTION when in name_list is a name that is not in allowed_name_list. list allowed_name_list contains names which are valid list name_list contains names for validation string option_type describes type of option for reporting purposes string code_to_allow_extra_names is code for forcing invalid names. If it is empty report INVALID_OPTION is non-forceable error. If it is not empty report INVALID_OPTION is forceable error or warning. bool allow_extra_names is flag that complements code_to_allow_extra_names and determines wheter is report INVALID_OPTION forceable error or warning. """ invalid_names = set(name_list) - set(allowed_name_list) if not invalid_names: return [] create_report = reports.get_problem_creator( code_to_allow_extra_names, allow_extra_names ) return [create_report( reports.invalid_option, sorted(invalid_names), sorted(allowed_name_list), option_type, )]
def node_clear(env, node_name, allow_clear_cluster_node=False): """ Remove specified node from various cluster caches. LibraryEnvironment env provides all for communication with externals string node_name bool allow_clear_cluster_node -- flag allows to clear node even if it's still in a cluster """ mocked_envs = [] if not env.is_cib_live: mocked_envs.append("CIB") if not env.is_corosync_conf_live: mocked_envs.append("COROSYNC_CONF") if mocked_envs: raise LibraryError(reports.live_environment_required(mocked_envs)) current_nodes = get_nodes(env.get_corosync_conf(), env.get_cib()) if (node_addresses_contain_name(current_nodes, node_name) or node_addresses_contain_host(current_nodes, node_name)): env.report_processor.process( reports.get_problem_creator( report_codes.FORCE_CLEAR_CLUSTER_NODE, allow_clear_cluster_node)( reports.node_to_clear_is_still_in_cluster, node_name)) remove_node(env.cmd_runner(), node_name)
def names_in( allowed_name_list, name_list, option_type="option", code_to_allow_extra_names=None, extra_names_allowed=False, allowed_option_patterns=None, banned_name_list=None, ): """ Return a list with report INVALID_OPTIONS when in name_list is a name that is not in allowed_name_list. list allowed_name_list contains names which are valid list name_list contains names for validation string option_type describes type of option for reporting purposes string code_to_allow_extra_names is code for forcing invalid names. If it is empty report INVALID_OPTIONS is non-forceable error. If it is not empty report INVALID_OPTIONS is forceable error or warning. bool extra_names_allowed is flag that complements code_to_allow_extra_names and determines wheter is report INVALID_OPTIONS forceable error or warning. mixed allowed_option_patterns -- option patterns to be added to a report list banned_name_list -- list of options which cannot be forced """ name_set = set(name_list) banned_set = set(banned_name_list or []) banned_names = set() if not (code_to_allow_extra_names is None and not extra_names_allowed): banned_names = name_set & banned_set invalid_names = name_set - set(allowed_name_list) - banned_names report_list = [] create_report = reports.get_problem_creator(code_to_allow_extra_names, extra_names_allowed) if invalid_names: report_list.append( create_report(reports.invalid_options, sorted(invalid_names), sorted(allowed_name_list), option_type, allowed_option_patterns=sorted( allowed_option_patterns or []))) if banned_names: report_list.append( reports.invalid_options( sorted(banned_names), sorted(allowed_name_list), option_type, )) return report_list
def config_setup( env: LibraryEnvironment, site_list, arbitrator_list, instance_name=None, overwrite_existing=False, ): """ create booth configuration env list site_list -- site adresses of multisite list arbitrator_list -- arbitrator adresses of multisite string instance_name -- booth instance name bool overwrite_existing -- allow overwriting existing files """ instance_name = instance_name or constants.DEFAULT_INSTANCE_NAME report_processor = env.report_processor report_processor.report_list( config_validators.check_instance_name(instance_name)) report_processor.report_list( config_validators.create(site_list, arbitrator_list)) if report_processor.has_errors: raise LibraryError() booth_env = env.get_booth_env(instance_name) booth_conf = booth_env.create_facade(site_list, arbitrator_list) booth_conf.set_authfile(booth_env.key_path) report_creator = reports.get_problem_creator( force_code=report_codes.FORCE_FILE_OVERWRITE, is_forced=overwrite_existing) try: booth_env.key.write_raw(tools.generate_binary_key( random_bytes_count=settings.booth_authkey_bytes), can_overwrite=overwrite_existing) booth_env.config.write_facade(booth_conf, can_overwrite=overwrite_existing) except FileAlreadyExists as e: report_processor.report( report_creator( reports.file_already_exists, e.metadata.file_type_code, e.metadata.path, )) except RawFileError as e: report_processor.report(raw_file_error_report(e)) if report_processor.has_errors: raise LibraryError()
def exception_to_report_list(exception, file_type_code, file_path, force_code, is_forced_or_warning): del file_type_code # this is defined by the report code report_creator = reports.get_problem_creator( force_code=force_code, is_forced=is_forced_or_warning) if isinstance(exception, InvalidLines): return [ report_creator( booth_reports.booth_config_unexpected_lines, exception.args[0], file_path=file_path, ) ] raise exception
def names_in( allowed_name_list, name_list, option_type="option", code_to_allow_extra_names=None, extra_names_allowed=False, allowed_option_patterns=None, banned_name_list=None, ): """ Return a list with report INVALID_OPTIONS when in name_list is a name that is not in allowed_name_list. list allowed_name_list contains names which are valid list name_list contains names for validation string option_type describes type of option for reporting purposes string code_to_allow_extra_names is code for forcing invalid names. If it is empty report INVALID_OPTIONS is non-forceable error. If it is not empty report INVALID_OPTIONS is forceable error or warning. bool extra_names_allowed is flag that complements code_to_allow_extra_names and determines wheter is report INVALID_OPTIONS forceable error or warning. mixed allowed_option_patterns -- option patterns to be added to a report list banned_name_list -- list of options which cannot be forced """ name_set = set(name_list) banned_set = set(banned_name_list or []) banned_names = set() if not (code_to_allow_extra_names is None and not extra_names_allowed): banned_names = name_set & banned_set invalid_names = name_set - set(allowed_name_list) - banned_names report_list = [] create_report = reports.get_problem_creator( code_to_allow_extra_names, extra_names_allowed ) if invalid_names: report_list.append(create_report( reports.invalid_options, sorted(invalid_names), sorted(allowed_name_list), option_type, allowed_option_patterns=sorted(allowed_option_patterns or []) )) if banned_names: report_list.append(reports.invalid_options( sorted(banned_names), sorted(allowed_name_list), option_type, )) return report_list
def _validate_value(self, value): if not self._is_valid(value.normalized): create_report = reports.get_problem_creator( self._code_for_warning, self._produce_warning) return [ create_report( reports.invalid_option_value, self._get_option_name_for_report(), value.original, self._get_allowed_values(), cannot_be_empty=self._value_cannot_be_empty, forbidden_characters=self._forbidden_characters, ) ] return []
def _validate_remote_connection(resource_agent, existing_nodes_addrs, resource_id, instance_attributes, allow_not_suitable_command): if resource_agent.get_name() != resource.remote_node.AGENT_NAME.full_name: return [] report_list = [] report_list.append( reports.get_problem_creator(report_codes.FORCE_NOT_SUITABLE_COMMAND, allow_not_suitable_command)( reports.use_command_node_add_remote)) report_list.extend( resource.remote_node.validate_host_not_conflicts( existing_nodes_addrs, resource_id, instance_attributes)) return report_list
def raw_file_error_report(error, force_code=None, is_forced_or_warning=False): """ Translate a RawFileError instance to a report RawFileError error -- an exception to be translated string force_code -- is it a forcible error? by which code? bool is_forced_or_warning -- translate to a warning if True, error otherwise """ return reports.get_problem_creator(force_code, is_forced_or_warning)( reports.file_io_error, error.metadata.file_type_code, error.action, error.reason, # do not report real file path if we were working with a ghost file file_path=("" if isinstance(error, GhostFileError) else error.metadata.path), )
def validate(option_dict): value = ValuePair.get(option_dict[option_name]) if not predicate(value.normalized): create_report = reports.get_problem_creator( code_to_allow_extra_values, allow_extra_values) return [ create_report( reports.invalid_option_value, option_name_for_report if option_name_for_report is not None else option_name, value.original, value_type_or_enum, ) ] return []
def validate(option_dict): value = ValuePair.get(option_dict[option_name]) if not predicate(value.normalized): create_report = reports.get_problem_creator( code_to_allow_extra_values, extra_values_allowed ) return [create_report( reports.invalid_option_value, option_name_for_report if option_name_for_report is not None else option_name , value.original, value_type_or_enum, )] return []
def validate_unique_instance_attributes( resource_agent, instance_attributes, resources_section, resource_id=None, force=False ): report_list = [] report_creator = reports.get_problem_creator( report_codes.FORCE_OPTIONS, force ) ra_unique_attributes = [ param["name"] for param in resource_agent.get_parameters() if param["unique"] ] same_agent_resources = find_primitives_by_agent( resources_section, resource_agent ) for attr in ra_unique_attributes: if attr not in instance_attributes: continue conflicting_resources = { primitive.get("id") for primitive in same_agent_resources if ( primitive.get("id") != resource_id and instance_attributes[attr] == get_value( "instance_attributes", primitive, attr ) ) } if conflicting_resources: report_list.append( report_creator( reports.resource_instance_attr_value_not_unique, attr, instance_attributes[attr], resource_agent.get_name(), conflicting_resources, ) ) return report_list
def validate(option_dict): if option_name not in option_dict: return [] value = option_dict[option_name] if not isinstance(value, ValuePair): value = ValuePair(value, value) if(value.normalized not in allowed_values): create_report = reports.get_problem_creator( code_to_allow_extra_values, allow_extra_values ) return [create_report( reports.invalid_option_value, option_name_for_report if option_name_for_report is not None else option_name , value.original, allowed_values, )] return []
def _validate_remote_connection( resource_agent, existing_nodes_addrs, resource_id, instance_attributes, allow_not_suitable_command ): if resource_agent.get_name() != resource.remote_node.AGENT_NAME.full_name: return [] report_list = [] report_list.append( reports.get_problem_creator( report_codes.FORCE_NOT_SUITABLE_COMMAND, allow_not_suitable_command )(reports.use_command_node_add_remote) ) report_list.extend( resource.remote_node.validate_host_not_conflicts( existing_nodes_addrs, resource_id, instance_attributes ) ) return report_list
def _find_resources_to_remove( cib, report_processor, node_type, node_identifier, allow_remove_multiple_nodes, find_resources ): resource_element_list = find_resources(get_resources(cib), node_identifier) if not resource_element_list: raise LibraryError(reports.node_not_found(node_identifier, node_type)) if len(resource_element_list) > 1: report_processor.process( reports.get_problem_creator( report_codes.FORCE_REMOVE_MULTIPLE_NODES, allow_remove_multiple_nodes )( reports.multiple_result_found, "resource", [resource.attrib["id"] for resource in resource_element_list], node_identifier ) ) return resource_element_list
def _validate_guest_change( tree, existing_nodes_names, existing_nodes_addrs, meta_attributes, allow_not_suitable_command, detect_remove=False ): if not resource.guest_node.is_node_name_in_options(meta_attributes): return [] node_name = resource.guest_node.get_node_name_from_options(meta_attributes) report_list = [] create_report = reports.use_command_node_add_guest if ( detect_remove and not resource.guest_node.get_guest_option_value(meta_attributes) ): create_report = reports.use_command_node_remove_guest report_list.append( reports.get_problem_creator( report_codes.FORCE_NOT_SUITABLE_COMMAND, allow_not_suitable_command )(create_report) ) report_list.extend( resource.guest_node.validate_conflicts( tree, existing_nodes_names, existing_nodes_addrs, node_name, meta_attributes ) ) return report_list
def remove_nodes(env, node_list, force_quorum_loss=False, skip_offline=False): """ Remove nodes from a cluster. env LibraryEnvironment node_list iterable -- names of nodes to remove force_quorum_loss bool -- treat quorum loss as a warning if True skip_offline bool -- treat unreachable nodes as warnings if True """ _ensure_live_env(env) # raises if env is not live report_processor = SimpleReportProcessor(env.report_processor) target_factory = env.get_node_target_factory() corosync_conf = env.get_corosync_conf() cluster_nodes_names = corosync_conf.get_nodes_names() # validations report_processor.report_list( config_validators.remove_nodes( node_list, corosync_conf.get_nodes(), corosync_conf.get_quorum_device_settings(), )) if report_processor.has_errors: # If there is an error, there is usually not much sense in doing other # validations: # - if there would be no node left in the cluster, it's pointless # to check for quorum loss or if at least one remaining node is online # - if only one node is being removed and it doesn't exist, it's again # pointless to check for other issues raise LibraryError() target_report_list, cluster_nodes_target_list = ( target_factory.get_target_list_with_reports( cluster_nodes_names, skip_non_existing=skip_offline, )) known_nodes = set([target.label for target in cluster_nodes_target_list]) unknown_nodes = set( [name for name in cluster_nodes_names if name not in known_nodes]) report_processor.report_list(target_report_list) com_cmd = GetOnlineTargets( report_processor, ignore_offline_targets=skip_offline, ) com_cmd.set_targets(cluster_nodes_target_list) online_target_list = run_com(env.get_node_communicator(), com_cmd) offline_target_list = [ target for target in cluster_nodes_target_list if target not in online_target_list ] staying_online_target_list = [ target for target in online_target_list if target.label not in node_list ] targets_to_remove = [ target for target in cluster_nodes_target_list if target.label in node_list ] if not staying_online_target_list: report_processor.report( reports.unable_to_connect_to_any_remaining_node()) # If no remaining node is online, there is no point in checking quorum # loss or anything as we would just get errors. raise LibraryError() if skip_offline: staying_offline_nodes = ([ target.label for target in offline_target_list if target.label not in node_list ] + [name for name in unknown_nodes if name not in node_list]) if staying_offline_nodes: report_processor.report( reports.unable_to_connect_to_all_remaining_node( staying_offline_nodes)) atb_has_to_be_enabled = sbd.atb_has_to_be_enabled(env.cmd_runner(), corosync_conf, -len(node_list)) if atb_has_to_be_enabled: report_processor.report( reports.corosync_quorum_atb_will_be_enabled_due_to_sbd()) com_cmd = CheckCorosyncOffline( report_processor, allow_skip_offline=False, ) com_cmd.set_targets(staying_online_target_list) run_com(env.get_node_communicator(), com_cmd) else: # Check if removing the nodes would cause quorum loss. We ask the nodes # to be removed for their view of quorum. If they are all stopped or # not in a quorate partition, their removal cannot cause quorum loss. # That's why we ask them and not the remaining nodes. # example: 5-node cluster, 3 online nodes, removing one online node, # results in 4-node cluster with 2 online nodes => quorum lost # Check quorum loss only if ATB does not need to be enabled. If it is # required, cluster has to be turned off and therefore it loses quorum. forceable_report_creator = reports.get_problem_creator( report_codes.FORCE_QUORUM_LOSS, force_quorum_loss) com_cmd = cluster.GetQuorumStatus(report_processor) com_cmd.set_targets(targets_to_remove) failures, quorum_status = run_com(env.get_node_communicator(), com_cmd) if quorum_status: if quorum_status.stopping_nodes_cause_quorum_loss(node_list): report_processor.report( forceable_report_creator( reports.corosync_quorum_will_be_lost)) elif failures or not targets_to_remove: report_processor.report( forceable_report_creator( reports.corosync_quorum_loss_unable_to_check, )) if report_processor.has_errors: raise LibraryError() # validations done unknown_to_remove = [name for name in unknown_nodes if name in node_list] if unknown_to_remove: report_processor.report( reports.nodes_to_remove_unreachable(unknown_to_remove)) if targets_to_remove: com_cmd = cluster.DestroyWarnOnFailure(report_processor) com_cmd.set_targets(targets_to_remove) run_and_raise(env.get_node_communicator(), com_cmd) corosync_conf.remove_nodes(node_list) if atb_has_to_be_enabled: corosync_conf.set_quorum_options(dict(auto_tie_breaker="1")) com_cmd = DistributeCorosyncConf( env.report_processor, corosync_conf.config.export(), allow_skip_offline=False, ) com_cmd.set_targets(staying_online_target_list) run_and_raise(env.get_node_communicator(), com_cmd) com_cmd = ReloadCorosyncConf(env.report_processor) com_cmd.set_targets(staying_online_target_list) run_and_raise(env.get_node_communicator(), com_cmd) # try to remove nodes from pcmk using crm_node -R <node> --force and if not # successful remove it directly from CIB file on all nodes in parallel com_cmd = RemoveNodesFromCib(env.report_processor, node_list) com_cmd.set_targets(staying_online_target_list) run_and_raise(env.get_node_communicator(), com_cmd)
def create_into_bundle( env, resource_id, resource_agent_name, operation_list, meta_attributes, instance_attributes, bundle_id, allow_absent_agent=False, allow_invalid_operation=False, allow_invalid_instance_attributes=False, use_default_operations=True, ensure_disabled=False, wait=False, allow_not_suitable_command=False, allow_not_accessible_resource=False, ): # pylint: disable=too-many-arguments, too-many-locals """ Create a new resource in a cib and put it into an existing bundle LibraryEnvironment env provides all for communication with externals string resource_id is identifier of resource string resource_agent_name contains name for the identification of agent list of dict operation_list contains attributes for each entered operation dict meta_attributes contains attributes for primitive/meta_attributes dict instance_attributes contains attributes for primitive/instance_attributes string bundle_id is id of an existing bundle to put the created resource in bool allow_absent_agent is a flag for allowing agent that is not installed in a system bool allow_invalid_operation is a flag for allowing to use operations that are not listed in a resource agent metadata bool allow_invalid_instance_attributes is a flag for allowing to use instance attributes that are not listed in a resource agent metadata or for allowing to not use the instance_attributes that are required in resource agent metadata bool use_default_operations is a flag for stopping stopping of adding default cib operations (specified in a resource agent) bool ensure_disabled is flag that keeps resource in target-role "Stopped" mixed wait is flag for controlling waiting for pacemaker idle mechanism bool allow_not_suitable_command -- flag for FORCE_NOT_SUITABLE_COMMAND bool allow_not_accessible_resource -- flag for FORCE_RESOURCE_IN_BUNDLE_NOT_ACCESSIBLE """ resource_agent = get_agent( env.report_processor, env.cmd_runner(), resource_agent_name, allow_absent_agent, ) with resource_environment( env, wait, [resource_id], _ensure_disabled_after_wait( ensure_disabled or resource.common.are_meta_disabled(meta_attributes) ), required_cib_version=Version(2, 8, 0) ) as resources_section: id_provider = IdProvider(resources_section) _check_special_cases( env, resource_agent, resources_section, resource_id, meta_attributes, instance_attributes, allow_not_suitable_command ) primitive_element = resource.primitive.create( env.report_processor, resources_section, id_provider, resource_id, resource_agent, operation_list, meta_attributes, instance_attributes, allow_invalid_operation, allow_invalid_instance_attributes, use_default_operations, ) if ensure_disabled: resource.common.disable(primitive_element, id_provider) bundle_el = _find_bundle(resources_section, bundle_id) if not resource.bundle.is_pcmk_remote_accessible(bundle_el): env.report_processor.process( reports.get_problem_creator( report_codes.FORCE_RESOURCE_IN_BUNDLE_NOT_ACCESSIBLE, allow_not_accessible_resource )( reports.resource_in_bundle_not_accessible, bundle_id, resource_id ) ) resource.bundle.add_resource(bundle_el, primitive_element)
def create_into_bundle( env, resource_id, resource_agent_name, operation_list, meta_attributes, instance_attributes, bundle_id, allow_absent_agent=False, allow_invalid_operation=False, allow_invalid_instance_attributes=False, use_default_operations=True, ensure_disabled=False, wait=False, allow_not_suitable_command=False, allow_not_accessible_resource=False, ): # pylint: disable=too-many-arguments, too-many-locals """ Create a new resource in a cib and put it into an existing bundle LibraryEnvironment env provides all for communication with externals string resource_id is identifier of resource string resource_agent_name contains name for the identification of agent list of dict operation_list contains attributes for each entered operation dict meta_attributes contains attributes for primitive/meta_attributes dict instance_attributes contains attributes for primitive/instance_attributes string bundle_id is id of an existing bundle to put the created resource in bool allow_absent_agent is a flag for allowing agent that is not installed in a system bool allow_invalid_operation is a flag for allowing to use operations that are not listed in a resource agent metadata bool allow_invalid_instance_attributes is a flag for allowing to use instance attributes that are not listed in a resource agent metadata or for allowing to not use the instance_attributes that are required in resource agent metadata bool use_default_operations is a flag for stopping stopping of adding default cib operations (specified in a resource agent) bool ensure_disabled is flag that keeps resource in target-role "Stopped" mixed wait is flag for controlling waiting for pacemaker idle mechanism bool allow_not_suitable_command -- flag for FORCE_NOT_SUITABLE_COMMAND bool allow_not_accessible_resource -- flag for FORCE_RESOURCE_IN_BUNDLE_NOT_ACCESSIBLE """ resource_agent = get_agent( env.report_processor, env.cmd_runner(), resource_agent_name, allow_absent_agent, ) with resource_environment( env, wait, [resource_id], _ensure_disabled_after_wait( ensure_disabled or resource.common.are_meta_disabled(meta_attributes)), required_cib_version=Version(2, 8, 0)) as resources_section: id_provider = IdProvider(resources_section) _check_special_cases(env, resource_agent, resources_section, resource_id, meta_attributes, instance_attributes, allow_not_suitable_command) primitive_element = resource.primitive.create( env.report_processor, resources_section, id_provider, resource_id, resource_agent, operation_list, meta_attributes, instance_attributes, allow_invalid_operation, allow_invalid_instance_attributes, use_default_operations, ) if ensure_disabled: resource.common.disable(primitive_element, id_provider) bundle_el = _find_bundle(resources_section, bundle_id) if not resource.bundle.is_pcmk_remote_accessible(bundle_el): env.report_processor.process( reports.get_problem_creator( report_codes.FORCE_RESOURCE_IN_BUNDLE_NOT_ACCESSIBLE, allow_not_accessible_resource)( reports.resource_in_bundle_not_accessible, bundle_id, resource_id)) resource.bundle.add_resource(bundle_el, primitive_element)
def add_nodes( env, nodes, wait=False, start=False, enable=False, force=False, force_unresolvable=False, skip_offline_nodes=False, no_watchdog_validation=False, ): # pylint: disable=too-many-locals """ Add specified nodes to the local cluster Raise LibraryError on any error. env LibraryEnvironment nodes list -- list of dicts which represents node. Supported keys are: name (required), addrs (list), devices (list), watchdog wait -- specifies if command should try to wait for cluster to start up. Has no effect start is False. If set to False command will not wait for cluster to start. If None command will wait for some default timeout. If int wait set timeout to int value of seconds. start bool -- if True start cluster when it is set up enable bool -- if True enable cluster when it is set up force bool -- if True some validations errors are treated as warnings force_unresolvable bool -- if True not resolvable addresses of nodes are treated as warnings skip_offline_nodes bool -- if True non fatal connection failures to other hosts are treated as warnings no_watchdog_validation bool -- if True do not validate specified watchdogs on remote hosts """ _ensure_live_env(env) # raises if env is not live report_processor = SimpleReportProcessor(env.report_processor) target_factory = env.get_node_target_factory() is_sbd_enabled = sbd.is_sbd_enabled(env.cmd_runner()) corosync_conf = env.get_corosync_conf() cluster_nodes_names = corosync_conf.get_nodes_names() corosync_node_options = {"name", "addrs"} sbd_node_options = {"devices", "watchdog"} keys_to_normalize = {"addrs"} if is_sbd_enabled: keys_to_normalize |= sbd_node_options new_nodes = [_normalize_dict(node, keys_to_normalize) for node in nodes] # get targets for existing nodes target_report_list, cluster_nodes_target_list = ( target_factory.get_target_list_with_reports( cluster_nodes_names, skip_non_existing=skip_offline_nodes, )) report_processor.report_list(target_report_list) # get a target for qnetd if needed qdevice_model, qdevice_model_options, _, _ = ( corosync_conf.get_quorum_device_settings()) if qdevice_model == "net": try: qnetd_target = target_factory.get_target( qdevice_model_options["host"]) except HostNotFound: report_processor.report( reports.host_not_found([qdevice_model_options["host"]])) # Get targets for new nodes and report unknown (== not-authorized) nodes. # If a node doesn't contain the 'name' key, validation of inputs reports it. # That means we don't report missing names but cannot rely on them being # present either. target_report_list, new_nodes_target_list = ( target_factory.get_target_list_with_reports( [node["name"] for node in new_nodes if "name" in node], allow_skip=False, )) report_processor.report_list(target_report_list) # Set default values for not-specified node options. # Use an address defined in known-hosts for each node with no addresses # specified. This allows users not to specify node addresses at all which # simplifies the whole node add command / form significantly. new_nodes_target_dict = { target.label: target for target in new_nodes_target_list } addrs_defaulter = _get_addrs_defaulter(report_processor, new_nodes_target_dict) new_nodes_defaulters = {"addrs": addrs_defaulter} if is_sbd_enabled: watchdog_defaulter = _get_watchdog_defaulter(report_processor, new_nodes_target_dict) new_nodes_defaulters["devices"] = lambda _: [] new_nodes_defaulters["watchdog"] = watchdog_defaulter new_nodes = [ _set_defaults_in_dict(node, new_nodes_defaulters) for node in new_nodes ] new_nodes_dict = { node["name"]: node for node in new_nodes if "name" in node } # Validate inputs - node options names # We do not want to make corosync validators know about SBD options and # vice versa. Therefore corosync and SBD validators get only valid corosync # and SBD options respectively, and we need to check for any surplus # options here. report_processor.report_list( validate_names_in( corosync_node_options | sbd_node_options, set([ option for node_options in [node.keys() for node in new_nodes] for option in node_options ]), option_type="node")) # Validate inputs - corosync part try: cib = env.get_cib() cib_nodes = get_remote_nodes(cib) + get_guest_nodes(cib) except LibraryError: cib_nodes = [] report_processor.report( reports.get_problem_creator( report_codes.FORCE_LOAD_NODES_FROM_CIB, force)(reports.cib_load_error_get_nodes_for_validation)) # corosync validator rejects non-corosync keys new_nodes_corosync = [{ key: node[key] for key in corosync_node_options if key in node } for node in new_nodes] report_processor.report_list( config_validators.add_nodes(new_nodes_corosync, corosync_conf.get_nodes(), cib_nodes, force_unresolvable=force_unresolvable)) # Validate inputs - SBD part if is_sbd_enabled: report_processor.report_list( sbd.validate_new_nodes_devices({ node["name"]: node["devices"] for node in new_nodes if "name" in node })) else: for node in new_nodes: sbd_options = sbd_node_options.intersection(node.keys()) if sbd_options and "name" in node: report_processor.report( reports.sbd_not_used_cannot_set_sbd_options( sbd_options, node["name"])) # Validate inputs - flags part wait_timeout = _get_validated_wait_timeout(report_processor, wait, start) # Get online cluster nodes # This is the only call in which we accept skip_offline_nodes option for the # cluster nodes. In all the other actions we communicate only with the # online nodes. This allows us to simplify code as any communication issue # is considered an error, ends the command processing and is not possible # to skip it by skip_offline_nodes. We do not have to care about a situation # when a communication command cannot connect to some nodes and then the # next command can connect but fails due to the previous one did not # succeed. online_cluster_target_list = [] if cluster_nodes_target_list: com_cmd = GetOnlineTargets( report_processor, ignore_offline_targets=skip_offline_nodes, ) com_cmd.set_targets(cluster_nodes_target_list) online_cluster_target_list = run_com(env.get_node_communicator(), com_cmd) offline_cluster_target_list = [ target for target in cluster_nodes_target_list if target not in online_cluster_target_list ] if len(online_cluster_target_list) == 0: report_processor.report( reports.unable_to_perform_operation_on_any_node()) elif offline_cluster_target_list and skip_offline_nodes: # TODO: report (warn) how to fix offline nodes when they come online # report_processor.report(None) pass # Validate existing cluster nodes status atb_has_to_be_enabled = sbd.atb_has_to_be_enabled(env.cmd_runner(), corosync_conf, len(new_nodes)) if atb_has_to_be_enabled: report_processor.report( reports.corosync_quorum_atb_will_be_enabled_due_to_sbd()) if online_cluster_target_list: com_cmd = CheckCorosyncOffline( report_processor, allow_skip_offline=False, ) com_cmd.set_targets(online_cluster_target_list) run_com(env.get_node_communicator(), com_cmd) # Validate new nodes. All new nodes have to be online. com_cmd = GetHostInfo(report_processor) com_cmd.set_targets(new_nodes_target_list) report_processor.report_list( _host_check_cluster_setup( run_com(env.get_node_communicator(), com_cmd), force, # version of services may not be the same across the existing # cluster nodes, so it's not easy to make this check properly check_services_versions=False, )) # Validate SBD on new nodes if is_sbd_enabled: if no_watchdog_validation: report_processor.report(reports.sbd_watchdog_validation_inactive()) com_cmd = CheckSbd(report_processor) for new_node_target in new_nodes_target_list: new_node = new_nodes_dict[new_node_target.label] # Do not send watchdog if validation is turned off. Listing of # available watchdogs in pcsd may restart the machine in some # corner cases. com_cmd.add_request( new_node_target, watchdog="" if no_watchdog_validation else new_node["watchdog"], device_list=new_node["devices"], ) run_com(env.get_node_communicator(), com_cmd) if report_processor.has_errors: raise LibraryError() # Validation done. If errors occured, an exception has been raised and we # don't get below this line. # First set up everything else than corosync. Once the new nodes are present # in corosync.conf, they're considered part of a cluster and the node add # command cannot be run again. So we need to minimize the amout of actions # (and therefore possible failures) after adding the nodes to corosync. # distribute auth tokens of all cluster nodes (including the new ones) to # all new nodes com_cmd = UpdateKnownHosts( env.report_processor, known_hosts_to_add=env.get_known_hosts(cluster_nodes_names + list(new_nodes_dict.keys())), known_hosts_to_remove=[], ) com_cmd.set_targets(new_nodes_target_list) run_and_raise(env.get_node_communicator(), com_cmd) # qdevice setup if qdevice_model == "net": qdevice_net.set_up_client_certificates( env.cmd_runner(), env.report_processor, env.communicator_factory, qnetd_target, corosync_conf.get_cluster_name(), new_nodes_target_list, # we don't want to allow skiping offline nodes which are being # added, otherwise qdevice will not work properly skip_offline_nodes=False, allow_skip_offline=False) # sbd setup if is_sbd_enabled: sbd_cfg = environment_file_to_dict(sbd.get_local_sbd_config()) com_cmd = SetSbdConfig(env.report_processor) for new_node_target in new_nodes_target_list: new_node = new_nodes_dict[new_node_target.label] com_cmd.add_request( new_node_target, sbd.create_sbd_config( sbd_cfg, new_node["name"], watchdog=new_node["watchdog"], device_list=new_node["devices"], )) run_and_raise(env.get_node_communicator(), com_cmd) com_cmd = EnableSbdService(env.report_processor) com_cmd.set_targets(new_nodes_target_list) run_and_raise(env.get_node_communicator(), com_cmd) else: com_cmd = DisableSbdService(env.report_processor) com_cmd.set_targets(new_nodes_target_list) run_and_raise(env.get_node_communicator(), com_cmd) # booth setup booth_sync.send_all_config_to_node( env.get_node_communicator(), env.report_processor, new_nodes_target_list, rewrite_existing=force, skip_wrong_config=force, ) # distribute corosync and pacemaker authkeys files_action = {} forceable_io_error_creator = reports.get_problem_creator( report_codes.SKIP_FILE_DISTRIBUTION_ERRORS, force) if os.path.isfile(settings.corosync_authkey_file): try: files_action.update( node_communication_format.corosync_authkey_file( open(settings.corosync_authkey_file, "rb").read())) except EnvironmentError as e: report_processor.report( forceable_io_error_creator( reports.file_io_error, env_file_role_codes.COROSYNC_AUTHKEY, file_path=settings.corosync_authkey_file, operation="read", reason=format_environment_error(e))) if os.path.isfile(settings.pacemaker_authkey_file): try: files_action.update( node_communication_format.pcmk_authkey_file( open(settings.pacemaker_authkey_file, "rb").read())) except EnvironmentError as e: report_processor.report( forceable_io_error_creator( reports.file_io_error, env_file_role_codes.PACEMAKER_AUTHKEY, file_path=settings.pacemaker_authkey_file, operation="read", reason=format_environment_error(e))) # pcs_settings.conf was previously synced using pcsdcli send_local_configs. # This has been changed temporarily until new system for distribution and # syncronization of configs will be introduced. if os.path.isfile(settings.pcsd_settings_conf_location): try: files_action.update( node_communication_format.pcs_settings_conf_file( open(settings.pcsd_settings_conf_location, "r").read())) except EnvironmentError as e: report_processor.report( forceable_io_error_creator( reports.file_io_error, env_file_role_codes.PCS_SETTINGS_CONF, file_path=settings.pcsd_settings_conf_location, operation="read", reason=format_environment_error(e))) # stop here if one of the files could not be loaded and it was not forced if report_processor.has_errors: raise LibraryError() if files_action: com_cmd = DistributeFilesWithoutForces(env.report_processor, files_action) com_cmd.set_targets(new_nodes_target_list) run_and_raise(env.get_node_communicator(), com_cmd) # Distribute and reload pcsd SSL certificate report_processor.report( reports.pcsd_ssl_cert_and_key_distribution_started( [target.label for target in new_nodes_target_list])) try: with open(settings.pcsd_cert_location, "r") as f: ssl_cert = f.read() except EnvironmentError as e: report_processor.report( reports.file_io_error( env_file_role_codes.PCSD_SSL_CERT, file_path=settings.pcsd_cert_location, reason=format_environment_error(e), operation="read", )) try: with open(settings.pcsd_key_location, "r") as f: ssl_key = f.read() except EnvironmentError as e: report_processor.report( reports.file_io_error( env_file_role_codes.PCSD_SSL_KEY, file_path=settings.pcsd_key_location, reason=format_environment_error(e), operation="read", )) if report_processor.has_errors: raise LibraryError() com_cmd = SendPcsdSslCertAndKey(env.report_processor, ssl_cert, ssl_key) com_cmd.set_targets(new_nodes_target_list) run_and_raise(env.get_node_communicator(), com_cmd) # When corosync >= 2 is in use, the procedure for adding a node is: # 1. add the new node to corosync.conf on all existing nodes # 2. reload corosync.conf before the new node is started # 3. start the new node # If done otherwise, membership gets broken and qdevice hangs. Cluster # will recover after a minute or so but still it's a wrong way. corosync_conf.add_nodes(new_nodes_corosync) if atb_has_to_be_enabled: corosync_conf.set_quorum_options(dict(auto_tie_breaker="1")) com_cmd = DistributeCorosyncConf( env.report_processor, corosync_conf.config.export(), allow_skip_offline=False, ) com_cmd.set_targets(online_cluster_target_list + new_nodes_target_list) run_and_raise(env.get_node_communicator(), com_cmd) com_cmd = ReloadCorosyncConf(env.report_processor) com_cmd.set_targets(online_cluster_target_list) run_and_raise(env.get_node_communicator(), com_cmd) # Optionally enable and start cluster services. if enable: com_cmd = EnableCluster(env.report_processor) com_cmd.set_targets(new_nodes_target_list) run_and_raise(env.get_node_communicator(), com_cmd) if start: _start_cluster( env.communicator_factory, env.report_processor, new_nodes_target_list, wait_timeout=wait_timeout, )