def _service_stop(lib_env: LibraryEnvironment, func): lib_env.report_processor.report( ReportItem.info( reports.messages.ServiceActionStarted( reports.const.SERVICE_ACTION_STOP, "quorum device" ) ) ) try: func(lib_env.cmd_runner()) except external.StopServiceError as e: raise LibraryError( ReportItem.error( reports.messages.ServiceActionFailed( reports.const.SERVICE_ACTION_STOP, e.service, e.message ) ) ) from e lib_env.report_processor.report( ReportItem.info( reports.messages.ServiceActionSucceeded( reports.const.SERVICE_ACTION_STOP, "quorum device" ) ) )
def pull_config(env: LibraryEnvironment, node_name, instance_name=None): """ Get config from specified node and save it on local system. It will rewrite existing files. env string node_name -- name of the node from which the config should be fetched string instance_name -- booth instance name """ report_processor = env.report_processor booth_env = env.get_booth_env(instance_name) instance_name = booth_env.instance_name _ensure_live_env(env, booth_env) conf_dir = os.path.dirname(booth_env.config_path) env.report_processor.report( ReportItem.info( reports.messages.BoothFetchingConfigFromNode( node_name, config=instance_name, ))) com_cmd = BoothGetConfig(env.report_processor, instance_name) com_cmd.set_targets( [env.get_node_target_factory().get_target_from_hostname(node_name)]) # pylint: disable=unsubscriptable-object # In general, pylint is right. And it cannot know in this case code is OK. # It is covered by tests. output = run_and_raise(env.get_node_communicator(), com_cmd)[0][1] try: # TODO adapt to new file transfer framework once it is written if (output["authfile"]["name"] is not None and output["authfile"]["data"]): authfile_name = output["authfile"]["name"] report_list = config_validators.check_instance_name(authfile_name) if report_list: raise LibraryError(*report_list) booth_key = FileInstance.for_booth_key(authfile_name) booth_key.write_raw( base64.b64decode(output["authfile"]["data"].encode("utf-8")), can_overwrite=True, ) booth_env.config.write_raw(output["config"]["data"].encode("utf-8"), can_overwrite=True) env.report_processor.report( ReportItem.info( reports.messages.BoothConfigAcceptedByNode( name_list=[instance_name]))) except RawFileError as e: if not os.path.exists(conf_dir): report_processor.report( ReportItem.error( reports.messages.BoothPathNotExists(conf_dir))) else: report_processor.report(raw_file_error_report(e)) except KeyError as e: raise LibraryError( ReportItem.error( reports.messages.InvalidResponseFormat(node_name))) from e if report_processor.has_errors: raise LibraryError()
def initialize_block_devices(report_processor: ReportProcessor, cmd_runner, device_list, option_dict): """ Initialize devices with specified options in option_dict. Raise LibraryError on failure. report_processor -- report processor cmd_runner -- CommandRunner device_list -- list of strings option_dict -- dictionary of options and their values """ report_processor.report( ReportItem.info( reports.messages.SbdDeviceInitializationStarted(device_list))) cmd = [settings.sbd_binary] for device in device_list: cmd += ["-d", device] for option, value in sorted(option_dict.items()): cmd += [DEVICE_INITIALIZATION_OPTIONS_MAPPING[option], str(value)] cmd.append("create") _, std_err, ret_val = cmd_runner.run(cmd) if ret_val != 0: raise LibraryError( ReportItem.error( reports.messages.SbdDeviceInitializationError( device_list, std_err))) report_processor.report( ReportItem.info( reports.messages.SbdDeviceInitializationSuccess(device_list)))
def info_resource_state(cluster_state, resource_id): roles_with_nodes = get_resource_state(cluster_state, resource_id) if not roles_with_nodes: return ReportItem.info(reports.messages.ResourceDoesNotRun(resource_id)) return ReportItem.info( reports.messages.ResourceRunningOnNodes(resource_id, roles_with_nodes) )
def _process_response(self, response): report = response_to_report_item(response) target = response.request.target if report is None: try: parsed_response = json.loads(response.data) # If the node is offline, we only get the "offline" key. Asking # for any other in that case results in KeyError which is not # what we want. if parsed_response.get( "pending", True) or not parsed_response.get("online", False): self._not_yet_started_target_list.append(target) return report = ReportItem.info( reports.messages.ClusterStartSuccess(target.label)) except (json.JSONDecodeError, KeyError): report = ReportItem.error( reports.messages.InvalidResponseFormat(target.label)) else: if not response.was_connected: self._not_yet_started_target_list.append(target) report = response_to_report_item( response, severity=ReportItemSeverity.WARNING) self._report(report)
def start_booth(env: LibraryEnvironment, instance_name=None): """ Start specified instance of booth service, systemd systems supported only. On non-systemd systems it can be run like this: BOOTH_CONF_FILE=<booth-file-path> /etc/initd/booth-arbitrator env string instance_name -- booth instance name """ external.ensure_is_systemd() booth_env = env.get_booth_env(instance_name) _ensure_live_env(env, booth_env) instance_name = booth_env.instance_name try: external.start_service(env.cmd_runner(), "booth", instance_name) except external.StartServiceError as e: raise LibraryError( ReportItem.error( reports.messages.ServiceActionFailed( reports.const.SERVICE_ACTION_START, "booth", e.message, instance=instance_name, ))) from e env.report_processor.report( ReportItem.info( reports.messages.ServiceActionSucceeded( reports.const.SERVICE_ACTION_START, "booth", instance=instance_name, )))
def _get_success_report(self, node_label): return ReportItem.info( reports.messages.ServiceActionSucceeded( reports.const.SERVICE_ACTION_DISABLE, "corosync-qdevice", node_label, ))
def stop_booth(env: LibraryEnvironment, instance_name=None): """ Stop specified instance of booth service, systemd systems supported only. env string instance_name -- booth instance name """ ensure_is_systemd(env.service_manager) booth_env = env.get_booth_env(instance_name) _ensure_live_env(env, booth_env) instance_name = booth_env.instance_name try: env.service_manager.stop("booth", instance=instance_name) except ManageServiceError as e: raise LibraryError(service_exception_to_report(e)) from e env.report_processor.report( ReportItem.info( reports.messages.ServiceActionSucceeded( reports.const.SERVICE_ACTION_STOP, "booth", instance=instance_name, ) ) )
def stop_booth(env: LibraryEnvironment, instance_name=None): """ Stop specified instance of booth service, systemd systems supported only. env string instance_name -- booth instance name """ external.ensure_is_systemd() booth_env = env.get_booth_env(instance_name) _ensure_live_env(env, booth_env) instance_name = booth_env.instance_name try: external.stop_service(env.cmd_runner(), "booth", instance_name) except external.StopServiceError as e: raise LibraryError( ReportItem.error( reports.messages.ServiceActionFailed( reports.const.SERVICE_ACTION_STOP, "booth", e.message, instance=instance_name, ) ) ) env.report_processor.report( ReportItem.info( reports.messages.ServiceActionSucceeded( reports.const.SERVICE_ACTION_STOP, "booth", instance=instance_name, ) ) )
def _process_response(self, response): report_item = self._get_response_report(response) node_label = response.request.target.label if report_item is not None: self._report_list([ report_item, ReportItem( severity=ReportItemSeverity( self._failure_severity, self._failure_forceable, ), message=(reports.messages.CorosyncNotRunningCheckNodeError( node_label, )), ), ]) return try: status = response.data if not json.loads(status)["corosync"]: report_item = ReportItem.info( reports.messages.CorosyncNotRunningOnNode(node_label), ) else: report_item = ReportItem.error( reports.messages.CorosyncRunningOnNode(node_label), ) except (KeyError, json.JSONDecodeError): report_item = ReportItem( severity=ReportItemSeverity( self._failure_severity, self._failure_forceable, ), message=reports.messages.CorosyncNotRunningCheckNodeError( node_label, ), ) self._report(report_item)
def before(self): self._report( ReportItem.info( reports.messages.ClusterEnableStarted( sorted(self._target_label_list) ) ) )
def _service_stop( report_processor: ReportProcessor, service_manager: ServiceManagerInterface, service: str, ) -> None: report_processor.report( ReportItem.info( reports.messages.ServiceActionStarted( reports.const.SERVICE_ACTION_STOP, "quorum device"))) try: service_manager.stop(service) except ManageServiceError as e: raise LibraryError(service_exception_to_report(e)) from e report_processor.report( ReportItem.info( reports.messages.ServiceActionSucceeded( reports.const.SERVICE_ACTION_STOP, "quorum device")))
def _process_response(self, response): report = self._get_response_report(response) node_label = response.request.target.label if report is None: if response.data == "corosync is not enabled, skipping": report = ReportItem.info( reports.messages.ServiceActionSkipped( reports.const.SERVICE_ACTION_ENABLE, "corosync-qdevice", "corosync is not enabled", node_label, )) else: report = ReportItem.info( reports.messages.ServiceActionSucceeded( reports.const.SERVICE_ACTION_ENABLE, "corosync-qdevice", node_label, )) self._report(report)
def set_up_client_certificates( runner, reporter, communicator_factory, qnetd_target, cluster_name, cluster_nodes_target_list, skip_offline_nodes, allow_skip_offline=True, ): """ setup cluster nodes for using qdevice model net CommandRunner runner -- command runner instance ReportProcessor reporter -- report processor instance NodeCommunicatorFactory communicator_factory -- communicator facto. instance Target qnetd_target -- qdevice provider (qnetd host) string cluster_name -- name of the cluster to which qdevice is being added list cluster_nodes_target_list -- list of cluster nodes targets bool skip_offline_nodes -- continue even if not all nodes are accessible bool allow_skip_offline -- enables forcing errors by skip_offline_nodes """ reporter.report( ReportItem.info( reports.messages.QdeviceCertificateDistributionStarted() ) ) # get qnetd CA certificate com_cmd = qdevice_net_com.GetCaCert(reporter) com_cmd.set_targets([qnetd_target]) qnetd_ca_cert = run_and_raise( communicator_factory.get_communicator(), com_cmd )[0][1] # init certificate storage on all nodes com_cmd = qdevice_net_com.ClientSetup( reporter, qnetd_ca_cert, skip_offline_nodes, allow_skip_offline ) com_cmd.set_targets(cluster_nodes_target_list) run_and_raise(communicator_factory.get_communicator(), com_cmd) # create client certificate request cert_request = client_generate_certificate_request(runner, cluster_name) # sign the request on qnetd host com_cmd = qdevice_net_com.SignCertificate(reporter) com_cmd.add_request(qnetd_target, cert_request, cluster_name) signed_certificate = run_and_raise( communicator_factory.get_communicator(), com_cmd )[0][1] # transform the signed certificate to pk12 format which can sent to nodes pk12 = client_cert_request_to_pk12(runner, signed_certificate) # distribute final certificate to nodes com_cmd = qdevice_net_com.ClientImportCertificateAndKey( reporter, pk12, skip_offline_nodes, allow_skip_offline ) com_cmd.set_targets(cluster_nodes_target_list) run_and_raise(communicator_factory.get_communicator(), com_cmd)
def _process_response(self, response): report_item = response_to_report_item( response, severity=reports.ReportItemSeverity.WARNING) node_label = response.request.target.label if report_item is None: self._report( ReportItem.info( reports.messages.ClusterDestroySuccess(node_label))) else: self._report(report_item) self._unreachable_nodes.append(node_label)
def test_local_watchdog(lib_env, watchdog=None): """ Test local watchdog device by triggering it. System reset is expected. If watchdog is not specified, available watchdog will be used if there is only one. lib_env LibraryEnvironment watchdog string -- watchdog to trigger """ lib_env.report_processor.report( ReportItem.info(reports.messages.SystemWillReset())) sbd.test_watchdog(lib_env.cmd_runner(), watchdog)
def wait_for_idle(self, timeout: int = 0) -> None: """ Wait for the cluster to settle down. timeout -- timeout in seconds, if less than 0 wait will be skipped, if 0 wait indefinitely """ if timeout < 0: # timeout is turned off return self.report_processor.report( ReportItem.info(reports.messages.WaitForIdleStarted(timeout))) wait_for_idle(self.cmd_runner(), timeout)
def _process_response(self, response): report = response_to_report_item(response, severity=ReportItemSeverity.INFO) host_name = response.request.target.label if report is None: report = ReportItem.info( reports.messages.HostAlreadyAuthorized(host_name)) else: # If we cannot connect it may be because a node's address and / or # port is not correct. Since these are part of authentication info # we tell we're not authorized. self._not_authorized_host_name_list.append(host_name) self._report(report)
def _push_corosync_conf_live( self, target_list, corosync_conf_data, need_stopped_cluster, need_qdevice_reload, skip_offline_nodes, ): # TODO # * check for online nodes and run all commands on them only # * if those commands fail, exit with an error # * add support for allow_skip_offline=False # * use simple report procesor # Correct reloading is done in pcs.lib.cluster.remove_nodes for example. # Check if the cluster is stopped when needed if need_stopped_cluster: com_cmd = CheckCorosyncOffline( self.report_processor, skip_offline_nodes ) com_cmd.set_targets(target_list) run_and_raise(self.get_node_communicator(), com_cmd) # Distribute corosync.conf com_cmd = DistributeCorosyncConf( self.report_processor, corosync_conf_data, skip_offline_nodes ) com_cmd.set_targets(target_list) run_and_raise(self.get_node_communicator(), com_cmd) # Reload corosync if not need_stopped_cluster: # If cluster must be stopped then we cannot reload corosync because # the cluster is stopped. If it is not stopped, we do not even get # here. com_cmd = ReloadCorosyncConf(self.report_processor) com_cmd.set_targets(target_list) run_and_raise(self.get_node_communicator(), com_cmd) # Reload qdevice if needed if need_qdevice_reload: self.report_processor.report( ReportItem.info(reports.messages.QdeviceClientReloadStarted()) ) com_cmd = qdevice.Stop(self.report_processor, skip_offline_nodes) com_cmd.set_targets(target_list) run(self.get_node_communicator(), com_cmd) has_errors = com_cmd.has_errors com_cmd = qdevice.Start(self.report_processor, skip_offline_nodes) com_cmd.set_targets(target_list) run(self.get_node_communicator(), com_cmd) has_errors = has_errors or com_cmd.has_errors if has_errors: raise LibraryError()
def qdevice_destroy(lib_env: LibraryEnvironment, model, proceed_if_used=False): """ Stop and disable qdevice on local host and remove its configuration string model qdevice model to destroy bool procced_if_used destroy qdevice even if it is used by clusters """ _check_model(model) _check_qdevice_not_used(lib_env.report_processor, lib_env.cmd_runner(), model, proceed_if_used) _service_stop(lib_env, qdevice_net.qdevice_stop) _service_disable(lib_env, qdevice_net.qdevice_disable) qdevice_net.qdevice_destroy() lib_env.report_processor.report( ReportItem.info(reports.messages.QdeviceDestroySuccess(model)))
def _service_kill(lib_env: LibraryEnvironment, func): try: func(lib_env.cmd_runner()) except external.KillServicesError as e: raise LibraryError(*[ ReportItem.error( reports.messages.ServiceActionFailed( reports.const.SERVICE_ACTION_KILL, service, e.message)) for service in e.service ]) lib_env.report_processor.report( ReportItem.info( reports.messages.ServiceActionSucceeded( reports.const.SERVICE_ACTION_KILL, "quorum device")))
def _process_response(self, response): report_item = response_to_report_item(response) if report_item: self._report(report_item) return report_list = [] node_label = response.request.target.label try: data = json.loads(response.data) if not data["sbd"]["installed"]: report_list.append( ReportItem.error( reports.messages.SbdNotInstalled(node_label))) if "watchdog" in data: if data["watchdog"]["exist"]: if not data["watchdog"].get("is_supported", True): report_list.append( ReportItem.error( reports.messages.SbdWatchdogNotSupported( node_label, data["watchdog"]["path"]))) else: report_list.append( ReportItem.error( reports.messages.WatchdogNotFound( node_label, data["watchdog"]["path"]))) for device in data.get("device_list", []): if not device["exist"]: report_list.append( ReportItem.error( reports.messages.SbdDeviceDoesNotExist( device["path"], node_label))) elif not device["block_device"]: report_list.append( ReportItem.error( reports.messages.SbdDeviceIsNotBlockDevice( device["path"], node_label))) # TODO maybe we can check whenever device is initialized by sbd # (by running 'sbd -d <dev> dump;') except (ValueError, KeyError, TypeError): report_list.append( ReportItem.error( reports.messages.InvalidResponseFormat(node_label))) if report_list: self._report_list(report_list) else: self._report( ReportItem.info( reports.messages.SbdCheckSuccess( response.request.target.label)))
def _service_disable(lib_env: LibraryEnvironment, func): try: func(lib_env.cmd_runner()) except external.DisableServiceError as e: raise LibraryError( ReportItem.error( reports.messages.ServiceActionFailed( reports.const.SERVICE_ACTION_DISABLE, e.service, e.message, ))) lib_env.report_processor.report( ReportItem.info( reports.messages.ServiceActionSucceeded( reports.const.SERVICE_ACTION_DISABLE, "quorum device")))
def qdevice_setup(lib_env: LibraryEnvironment, model, enable, start): """ Initialize qdevice on local host with specified model string model qdevice model to initialize bool enable make qdevice service start on boot bool start start qdevice now """ _check_model(model) qdevice_net.qdevice_setup(lib_env.cmd_runner()) lib_env.report_processor.report( ReportItem.info(reports.messages.QdeviceInitializationSuccess(model))) if enable: _service_enable(lib_env, qdevice_net.qdevice_enable) if start: _service_start(lib_env, qdevice_net.qdevice_start)
def _process_response(self, response): report = self._get_response_report(response) if report is not None: self._report(report) return target = response.request.target try: parsed_data = json.loads(response.data) self._report( ReportItem.info( reports.messages.BoothConfigAcceptedByNode( node=target.label, name_list=sorted(parsed_data["saved"]), ) ) ) for filename in list(parsed_data["existing"]): self._report( ReportItem( severity=reports.item.get_severity( reports.codes.FORCE_FILE_OVERWRITE, self._rewrite_existing, ), message=reports.messages.FileAlreadyExists( # TODO specify file type; this will be overhauled # to a generic file transport framework anyway "", filename, node=target.label, ), ) ) for file, reason in dict(parsed_data["failed"]).items(): self._report( ReportItem.error( reports.messages.BoothConfigDistributionNodeError( target.label, reason, file, ) ) ) except (KeyError, TypeError, ValueError): self._report( ReportItem.error( reports.messages.InvalidResponseFormat(target.label) ) )
def get_cib( self, minimal_version: Optional[Version] = None, nice_to_have_version: Optional[Version] = None, ) -> _Element: if self.__loaded_cib_diff_source is not None: raise AssertionError("CIB has already been loaded") self.__loaded_cib_diff_source = get_cib_xml(self.cmd_runner()) self.__loaded_cib_to_modify = get_cib(self.__loaded_cib_diff_source) if ( nice_to_have_version is not None and minimal_version is not None and minimal_version >= nice_to_have_version ): nice_to_have_version = None for version, mandatory in ( (nice_to_have_version, False), (minimal_version, True), ): if version is not None: upgraded_cib, was_upgraded = ensure_cib_version( self.cmd_runner(), self.__loaded_cib_to_modify, version, fail_if_version_not_met=mandatory, ) if was_upgraded: self.__loaded_cib_to_modify = upgraded_cib self.__loaded_cib_diff_source = etree_to_str(upgraded_cib) if not self._cib_upgrade_reported: self.report_processor.report( ReportItem.info( reports.messages.CibUpgradeSuccessful() ) ) self._cib_upgrade_reported = True self.__loaded_cib_diff_source_feature_set = get_cib_crm_feature_set( self.__loaded_cib_to_modify, none_if_missing=True ) or Version(0, 0, 0) return self.__loaded_cib_to_modify
def _push_corosync_conf_live( self, target_list, corosync_conf_data, need_stopped_cluster, need_qdevice_reload, skip_offline_nodes, ): # Check if the cluster is stopped when needed if need_stopped_cluster: com_cmd = CheckCorosyncOffline(self.report_processor, skip_offline_nodes) com_cmd.set_targets(target_list) run_and_raise(self.get_node_communicator(), com_cmd) # Distribute corosync.conf com_cmd = DistributeCorosyncConf(self.report_processor, corosync_conf_data, skip_offline_nodes) com_cmd.set_targets(target_list) run_and_raise(self.get_node_communicator(), com_cmd) # Reload corosync if not need_stopped_cluster: # If cluster must be stopped then we cannot reload corosync because # the cluster is stopped. If it is not stopped, we do not even get # here. com_cmd = ReloadCorosyncConf(self.report_processor) com_cmd.set_targets(target_list) run_and_raise(self.get_node_communicator(), com_cmd) # Reload qdevice if needed if need_qdevice_reload: self.report_processor.report( ReportItem.info(reports.messages.QdeviceClientReloadStarted())) com_cmd = qdevice.Stop(self.report_processor, skip_offline_nodes) com_cmd.set_targets(target_list) run(self.get_node_communicator(), com_cmd) has_errors = com_cmd.has_errors com_cmd = qdevice.Start(self.report_processor, skip_offline_nodes) com_cmd.set_targets(target_list) run(self.get_node_communicator(), com_cmd) has_errors = has_errors or com_cmd.has_errors if has_errors: raise LibraryError()
def check_is_without_duplication( report_processor: ReportProcessor, constraint_section: _Element, element: _Element, are_duplicate: Callable[[_Element, _Element], bool], export_element: Callable[[_Element], Dict[str, Any]], duplication_allowed: bool = False, ) -> None: duplicate_element_list = [ duplicate_element for duplicate_element in cast( # The xpath method has a complicated return value, but we know our # xpath expression returns only elements. List[_Element], constraint_section.xpath(".//*[local-name()=$tag_name]", tag_name=element.tag), ) if (element is not duplicate_element and are_duplicate(element, duplicate_element)) ] if not duplicate_element_list: return if report_processor.report_list([ ReportItem.info( reports.messages.DuplicateConstraintsList( element.tag, [ export_element(duplicate_element) for duplicate_element in duplicate_element_list ], )), ReportItem( severity=reports.item.get_severity( reports.codes.FORCE, duplication_allowed, ), message=reports.messages.DuplicateConstraintsExist([ str(duplicate.attrib["id"]) for duplicate in duplicate_element_list ]), ), ]).has_errors: raise LibraryError()
def _process_response(self, response): report_item = self._get_response_report(response) node_label = response.request.target.label if report_item is None: self._report( ReportItem.info( reports.messages.CorosyncConfigAcceptedByNode(node_label))) else: self._report_list([ report_item, ReportItem( severity=ReportItemSeverity( self._failure_severity, self._failure_forceable, ), # pylint: disable=line-too-long message=reports.messages. CorosyncConfigDistributionNodeError(node_label, ), ), ])
def get_cib(self, minimal_version: Optional[Version] = None) -> Element: if self.__loaded_cib_diff_source is not None: raise AssertionError("CIB has already been loaded") self.__loaded_cib_diff_source = get_cib_xml(self.cmd_runner()) self.__loaded_cib_to_modify = get_cib(self.__loaded_cib_diff_source) if minimal_version is not None: upgraded_cib = ensure_cib_version(self.cmd_runner(), self.__loaded_cib_to_modify, minimal_version) if upgraded_cib is not None: self.__loaded_cib_to_modify = upgraded_cib self.__loaded_cib_diff_source = etree_to_str(upgraded_cib) if not self._cib_upgrade_reported: self.report_processor.report( ReportItem.info( reports.messages.CibUpgradeSuccessful())) self._cib_upgrade_reported = True self.__loaded_cib_diff_source_feature_set = get_cib_crm_feature_set( self.__loaded_cib_to_modify, none_if_missing=True) or Version( 0, 0, 0) return self.__loaded_cib_to_modify