def Components(self): complist = [] if not len(list(self.fullcomplist.keys())): for c in ["cib", "lrmd", "crmd", "attrd"]: self.fullcomplist[c] = Process( self, c, pats=self.templates.get_component(self.name, c), badnews_ignore=self.templates.get_component( self.name, "%s-ignore" % c), common_ignore=self.templates.get_component( self.name, "common-ignore")) # pengine uses dc_pats instead of pats self.fullcomplist["pengine"] = Process( self, "pengine", dc_pats=self.templates.get_component(self.name, "pengine"), badnews_ignore=self.templates.get_component( self.name, "pengine-ignore"), common_ignore=self.templates.get_component( self.name, "common-ignore")) # stonith-ng's process name is different from its component name self.fullcomplist["stonith-ng"] = Process( self, "stonith-ng", process="stonithd", pats=self.templates.get_component(self.name, "stonith"), badnews_ignore=self.templates.get_component( self.name, "stonith-ignore"), common_ignore=self.templates.get_component( self.name, "common-ignore")) # add (or replace) extra components self.fullcomplist["corosync"] = Process( self, "corosync", pats=self.templates.get_component(self.name, "corosync"), badnews_ignore=self.templates.get_component( self.name, "corosync-ignore"), common_ignore=self.templates.get_component( self.name, "common-ignore")) # Processes running under valgrind can't be shot with "killall -9 processname", # so don't include them in the returned list vgrind = self.Env["valgrind-procs"].split() for key in list(self.fullcomplist.keys()): if self.Env["valgrind-tests"]: if key in vgrind: self.log( "Filtering %s from the component list as it is being profiled by valgrind" % key) continue if key == "stonith-ng" and not self.Env["DoFencing"]: continue complist.append(self.fullcomplist[key]) return complist
def Components(self): complist = [] if not len(list(self.fullcomplist.keys())): for c in [ "pacemaker-based", "pacemaker-controld", "pacemaker-attrd", "pacemaker-execd", "pacemaker-fenced" ]: self.fullcomplist[c] = Process( self, c, pats=self.templates.get_component(self.name, c), badnews_ignore=self.templates.get_component( self.name, "%s-ignore" % c), common_ignore=self.templates.get_component( self.name, "common-ignore")) # the scheduler uses dc_pats instead of pats self.fullcomplist["pacemaker-schedulerd"] = Process( self, "pacemaker-schedulerd", dc_pats=self.templates.get_component(self.name, "pacemaker-schedulerd"), badnews_ignore=self.templates.get_component( self.name, "pacemaker-schedulerd-ignore"), common_ignore=self.templates.get_component( self.name, "common-ignore")) # add (or replace) extra components self.fullcomplist["corosync"] = Process( self, "corosync", pats=self.templates.get_component(self.name, "corosync"), badnews_ignore=self.templates.get_component( self.name, "corosync-ignore"), common_ignore=self.templates.get_component( self.name, "common-ignore")) # Processes running under valgrind can't be shot with "killall -9 processname", # so don't include them in the returned list vgrind = self.Env["valgrind-procs"].split() for key in list(self.fullcomplist.keys()): if self.Env["valgrind-tests"]: if key in vgrind: self.log( "Filtering %s from the component list as it is being profiled by valgrind" % key) continue if key == "pacemaker-fenced" and not self.Env["DoFencing"]: continue complist.append(self.fullcomplist[key]) return complist
def ais_components(self): complist = [] if not len(self.fullcomplist.keys()): for c in ["cib", "lrmd", "crmd", "attrd"]: self.fullcomplist[c] = Process( self, c, pats=self.templates.get_component(self.name, c), badnews_ignore=self.templates.get_component( self.name, "%s-ignore" % c), common_ignore=self.templates.get_component( self.name, "common-ignore")) self.fullcomplist["pengine"] = Process( self, "pengine", dc_pats=self.templates.get_component(self.name, "pengine"), badnews_ignore=self.templates.get_component( self.name, "pengine-ignore"), common_ignore=self.templates.get_component( self.name, "common-ignore")) self.fullcomplist["stonith-ng"] = Process( self, "stonith-ng", process="stonithd", pats=self.templates.get_component(self.name, "stonith"), badnews_ignore=self.templates.get_component( self.name, "stonith-ignore"), common_ignore=self.templates.get_component( self.name, "common-ignore")) vgrind = self.Env["valgrind-procs"].split() for key in self.fullcomplist.keys(): if self.Env["valgrind-tests"]: if key in vgrind: # Processes running under valgrind can't be shot with "killall -9 processname" self.log( "Filtering %s from the component list as it is being profiled by valgrind" % key) continue if key == "stonith-ng" and not self.Env["DoFencing"]: continue complist.append(self.fullcomplist[key]) #self.complist = [ fullcomplist["pengine"] ] return complist
def Components(self): self.ais_components() aisexec_ignore = [ "(ERROR|error): ais_dispatch: Receiving message .* failed", "crmd.*I_ERROR.*crmd_cib_connection_destroy", "cib.*(ERROR|error): cib_ais_destroy: AIS connection terminated", #"crmd.*(ERROR|error): crm_ais_destroy: AIS connection terminated", "crmd.*do_exit: Could not recover from internal error", "crmd.*I_TERMINATE.*do_recover", "attrd.*attrd_ais_destroy: Lost connection to OpenAIS service!", "stonithd.*(ERROR|error): AIS connection terminated", ] aisexec_ignore.extend(self.common_ignore) self.complist.append( Process( self, "aisexec", pats=[ "(ERROR|error): ais_dispatch: AIS connection failed", "crmd.*(ERROR|error): do_exit: Could not recover from internal error", "pengine.*Scheduling Node .* for STONITH", "stonithd.*requests a STONITH operation RESET on node", "stonithd.*Succeeded to STONITH the node", ], badnews_ignore=aisexec_ignore))
def Components(self): extra = {} extra["corosync"] = Process( self, "corosync", pats=self.templates.get_component(self.name, "corosync"), badnews_ignore=self.templates.get_component( self.name, "corosync-ignore"), common_ignore=self.templates.get_component(self.name, "common-ignore")) return self.ais_components(extra=extra)
def Components(self): self.ais_components() corosync_ignore = [ r"error: crmd_quorum_destroy: connection terminated", r"error: lrm_state_verify_stopped: .* resources were active at lrm disconnect", r"pacemakerd.*error: pcmk_child_exit: Child process .* exited", r"error: send_cpg_message: Sending message via cpg FAILED", r"error: verify_stopped: Resource .* was active at shutdown. You may ignore this error if it is unmanaged.", r"error: pcmk_cpg_dispatch: Connection to the CPG API failed:", r"error: pcmk_cman_dispatch: Connection to cman failed: -1", r"error: crm_ipc_read: Connection to cib_shm failed", r"error: mainloop_gio_callback: Connection to .* closed", r"crmd_cib_connection_destroys:", r"crmd.*error: do_recover: Action A_RECOVER .* not supported", r"error: do_log: FSA: Input I_TERMINATE.*do_recover", r"error: cib_ais_destroy: Corosync connection lost! Exiting.", r"attrd.*error: attrd_cib_connection_destroy: Connection to the CIB terminated...", r"error: send_ais_text: Sending message .* via cpg: FAILED", r"error: crmd_quorum_destroy: connection terminated", r"error: lrm_state_verify_stopped: .* resources were active at lrm disconnect.", r"error: crm_ipc_read: Connection to stonith-ng failed", r"crit: tengine_stonith_connection_destroy: Fencing daemon connection failed", r"error: stonith_connection_destroy_cb: LRMD lost STONITH connection", r"error: stonith_connection_failed: STONITH connection failed", r"error: te_connect_stonith: Sign-in failed: triggered a retry", r"error: process_lrm_event: LRM operation Fencing.*", r"error: do_log: FSA: Input I_ERROR from crmd_cib_connection_destroy.* received in state", r"error: do_log: FSA: Input I_ERROR from do_shutdown_req.* received in state", r"do_state_transition: State transition .* S_RECOVERY .*origin=crmd_cib_connection_destroy", r"do_state_transition: State transition .* S_RECOVERY .*origin=do_shutdown_req", r"crmd.*error: cib_native_perform_op_delegate: Couldn't perform cib_slave operation", r"crmd.*error: cib_native_perform_op_delegate: CIB disconnected", ] self.complist.append(Process(self, "corosync", pats = [ r"pacemakerd.*error: cfg_connection_destroy: Connection destroyed", r"pacemakerd.*error: cpg_connection_destroy: Connection destroyed", r"attrd_ais_destroy: Lost connection to Corosync service!", r"stonith_peer_ais_destroy: AIS connection terminated", r"cib_ais_destroy: Corosync connection lost! Exiting.", r"crmd_ais_destroy: connection terminated", r"error: lrm_state_verify_stopped: .* resources were active at lrm disconnect", r"pengine.*Scheduling Node .* for STONITH", r"log_operation: Operation .* for host .* with device .* returned: 0", r"tengine_stonith_notify: Peer .* was terminated .*: OK", ], badnews_ignore = corosync_ignore, common_ignore = self.common_ignore)) return self.complist
def Components(self): self.ais_components() c = "corosync" self.fullcomplist[c] = Process( self, c, pats=self.templates.get_component(self.name, c), badnews_ignore=self.templates.get_component( self.name, "%s-ignore" % c), common_ignore=self.templates.get_component(self.name, "common-ignore")) return self.ais_components()
def Components(self): self.ais_components() corosync_ignore = [ r"error: pcmk_cpg_dispatch: Connection to the CPG API failed: Library error", r"pacemakerd.*error: pcmk_child_exit: Child process .* exited", r"cib.*error: cib_cs_destroy: Corosync connection lost", r"attrd.*error: attrd_cib_connection_destroy: Connection to the CIB terminated", r"stonith-ng.*error: stonith_peer_cs_destroy: Corosync connection terminated", r"error: pcmk_child_exit: Child process cib .* exited: Invalid argument", r"error: pcmk_child_exit: Child process attrd .* exited: Transport endpoint is not connected", r"error: pcmk_child_exit: Child process crmd .* exited: Link has been severed", r"lrmd.*error: crm_ipc_read: Connection to stonith-ng failed", r"lrmd.*error: mainloop_gio_callback: Connection to stonith-ng.* closed", r"lrmd.*error: stonith_connection_destroy_cb: LRMD lost STONITH connection", r"crmd.*do_state_transition: State transition .* S_RECOVERY", r"crmd.*error: do_log: FSA: Input I_ERROR", r"crmd.*error: do_log: FSA: Input I_TERMINATE", r"crmd.*error: pcmk_cman_dispatch: Connection to cman failed", r"crmd.*error: crmd_fast_exit: Could not recover from internal error", r"error: crm_ipc_read: Connection to cib_shm failed", r"error: mainloop_gio_callback: Connection to cib_shm.* closed", ] self.complist.append( Process( self, "corosync", pats=[ r"pacemakerd.*error: cfg_connection_destroy: Connection destroyed", r"pacemakerd.*error: mcp_cpg_destroy: Connection destroyed", r"attrd_cs_destroy: Lost connection to Corosync service!", r"stonith_peer_cs_destroy: Corosync connection terminated", r"cib_cs_destroy: Corosync connection lost! Exiting.", r"crmd_(cs|quorum)_destroy: connection terminated", r"pengine.*Scheduling Node .* for STONITH", r"tengine_stonith_notify: Peer .* was terminated .*: OK", ], badnews_ignore=corosync_ignore, common_ignore=self.common_ignore)) return self.complist
def Components(self): complist = [] common_ignore = [ "Pending action:", "(ERROR|error): crm_log_message_adv:", "(ERROR|error): MSG: No message to dump", "pending LRM operations at shutdown", "Lost connection to the CIB manager", "Connection to the CIB terminated...", "Sending message to the CIB manager FAILED", "Action A_RECOVER .* not supported", "(ERROR|error): stonithd_op_result_ready: not signed on", "pingd.*(ERROR|error): send_update: Could not send update", "send_ipc_message: IPC Channel to .* is not connected", "unconfirmed_actions: Waiting on .* unconfirmed actions", "cib_native_msgready: Message pending on command channel", r": Performing A_EXIT_1 - forcefully exiting ", r"Resource .* was active at shutdown. You may ignore this error if it is unmanaged.", ] stonith_ignore = [ r"Updating failcount for child_DoFencing", r"error.*: Fencer connection failed \(will retry\)", "pacemaker-execd.*(ERROR|error): stonithd_receive_ops_result failed.", ] stonith_ignore.extend(common_ignore) ccm = Process( self, "ccm", triggersreboot=self.fastfail, pats=[ "State transition .* S_RECOVERY", "pacemaker-controld.*Action A_RECOVER .* not supported", r"pacemaker-controld.*: Input I_TERMINATE .*from do_recover", r"pacemaker-controld.*: Could not recover from internal error", "pacemaker-controld.*I_ERROR.*crmd_cib_connection_destroy", # these status numbers are likely wrong now r"pacemaker-controld.*exited with status 2", r"attrd.*exited with status 1", r"cib.*exited with status 2", # Not if it was fenced # "A new node joined the cluster", # "WARN: determine_online_status: Node .* is unclean", # "Scheduling Node .* for STONITH", # "Executing .* fencing operation", # "tengine_stonith_callback: .*result=0", # "Processing I_NODE_JOIN:.* cause=C_HA_MESSAGE", # "State transition S_.* -> S_INTEGRATION.*input=I_NODE_JOIN", "State transition S_STARTING -> S_PENDING", ], badnews_ignore=common_ignore) based = Process( self, "pacemaker-based", triggersreboot=self.fastfail, pats=[ "State transition .* S_RECOVERY", "Lost connection to the CIB manager", "Connection to the CIB manager terminated", r"pacemaker-controld.*: Input I_TERMINATE .*from do_recover", "pacemaker-controld.*I_ERROR.*crmd_cib_connection_destroy", r"pacemaker-controld.*: Could not recover from internal error", # these status numbers are likely wrong now r"pacemaker-controld.*exited with status 2", r"attrd.*exited with status 1", ], badnews_ignore=common_ignore) execd = Process( self, "pacemaker-execd", triggersreboot=self.fastfail, pats=[ "State transition .* S_RECOVERY", "LRM Connection failed", "pacemaker-controld.*I_ERROR.*lrm_connection_destroy", "State transition S_STARTING -> S_PENDING", r"pacemaker-controld.*: Input I_TERMINATE .*from do_recover", r"pacemaker-controld.*: Could not recover from internal error", # this status number is likely wrong now r"pacemaker-controld.*exited with status 2", ], badnews_ignore=common_ignore) controld = Process( self, "pacemaker-controld", triggersreboot=self.fastfail, pats=[ # "WARN: determine_online_status: Node .* is unclean", # "Scheduling Node .* for STONITH", # "Executing .* fencing operation", # "tengine_stonith_callback: .*result=0", "State transition .* S_IDLE", "State transition S_STARTING -> S_PENDING", ], badnews_ignore=common_ignore) schedulerd = Process( self, "pacemaker-schedulerd", triggersreboot=self.fastfail, pats=[ "State transition .* S_RECOVERY", r"pacemaker-controld.*: Input I_TERMINATE .*from do_recover", r"pacemaker-controld.*: Could not recover from internal error", r"pacemaker-controld.*CRIT.*: Connection to the scheduler failed", "pacemaker-controld.*I_ERROR.*save_cib_contents", # this status number is likely wrong now r"pacemaker-controld.*exited with status 2", ], badnews_ignore=common_ignore, dc_only=1) if self.Env["DoFencing"] == 1: complist.append( Process( self, "stoniths", triggersreboot=self.fastfail, dc_pats=[ r"pacemaker-controld.*CRIT.*: Fencing daemon connection failed", "Attempting connection to fencing daemon", ], badnews_ignore=stonith_ignore)) if self.fastfail == 0: ccm.pats.extend([ # these status numbers are likely wrong now r"attrd.*exited with status 1", r"pacemaker-(based|controld).*exited with status 2", ]) based.pats.extend([ # these status numbers are likely wrong now r"attrd.*exited with status 1", r"pacemaker-controld.*exited with status 2", ]) execd.pats.extend([ # these status numbers are likely wrong now r"pacemaker-controld.*exited with status 2", ]) complist.append(ccm) complist.append(based) complist.append(execd) complist.append(controld) complist.append(schedulerd) return complist
def ais_components(self): fullcomplist = {} self.complist = [] self.common_ignore = [ "Pending action:", "(ERROR|error): crm_log_message_adv:", "(ERROR|error): MSG: No message to dump", "pending LRM operations at shutdown", "Lost connection to the CIB service", "Connection to the CIB terminated...", "Sending message to CIB service FAILED", "apply_xml_diff: Diff application failed!", "crmd.*Action A_RECOVER .* not supported", "unconfirmed_actions: Waiting on .* unconfirmed actions", "cib_native_msgready: Message pending on command channel", "crmd.*do_exit: Performing A_EXIT_1 - forcefully exiting the CRMd", "verify_stopped: Resource .* was active at shutdown. You may ignore this error if it is unmanaged.", "(ERROR|error): attrd_connection_destroy: Lost connection to attrd", "info: te_fence_node: Executing .* fencing operation", # "error: native_create_actions: Resource .*stonith::.* is active on 2 nodes attempting recovery", # "error: process_pe_message: Transition .* ERRORs found during PE processing", ] fullcomplist["cib"] = Process( self, "cib", pats=[ "State transition .* S_RECOVERY", "Respawning .* crmd", "Respawning .* attrd", "error: crm_ipc_read: Connection to cib_.* failed", "error: mainloop_gio_callback: Connection to cib_.* closed", "Connection to the CIB terminated...", "Child process crmd exited .* rc=2", "Child process attrd exited .* rc=1", "crmd.*Input I_TERMINATE from do_recover", "crmd.*I_ERROR.*crmd_cib_connection_destroy", "crmd.*do_exit: Could not recover from internal error", ], badnews_ignore=self.common_ignore) fullcomplist["lrmd"] = Process( self, "lrmd", pats=[ "State transition .* S_RECOVERY", "LRM Connection failed", "Respawning .* crmd", "error: crm_ipc_read: Connection to lrmd failed", "error: mainloop_gio_callback: Connection to lrmd.* closed", "crmd.*I_ERROR.*lrm_connection_destroy", "Child process crmd exited .* rc=2", "crmd.*Input I_TERMINATE from do_recover", "crmd.*do_exit: Could not recover from internal error", ], badnews_ignore=self.common_ignore) fullcomplist["crmd"] = Process( self, "crmd", pats=[ # "WARN: determine_online_status: Node .* is unclean", # "Scheduling Node .* for STONITH", # "Executing .* fencing operation", # Only if the node wasn't the DC: "State transition S_IDLE", "State transition .* -> S_IDLE", ], badnews_ignore=self.common_ignore) fullcomplist["attrd"] = Process(self, "attrd", pats=[], badnews_ignore=self.common_ignore) fullcomplist["pengine"] = Process( self, "pengine", dc_pats=[ "State transition .* S_RECOVERY", "Respawning .* crmd", "Child process crmd exited .* rc=2", "crm_ipc_read: Connection to pengine failed", "error: mainloop_gio_callback: Connection to pengine.* closed", "crit: pe_ipc_destroy: Connection to the Policy Engine failed", "crmd.*I_ERROR.*save_cib_contents", "crmd.*Input I_TERMINATE from do_recover", "crmd.*do_exit: Could not recover from internal error", ], badnews_ignore=self.common_ignore) stonith_ignore = [ "LogActions: Recover Fencing", "update_failcount: Updating failcount for Fencing", "(ERROR|error): te_connect_stonith: Sign-in failed: triggered a retry", "stonith_connection_failed: STONITH connection failed, finalizing .* pending operations.", "process_lrm_event: LRM operation Fencing.* Error" ] stonith_ignore.extend(self.common_ignore) fullcomplist["stonith-ng"] = Process( self, "stonith-ng", process="stonithd", pats=[ "crm_ipc_read: Connection to stonith-ng failed", "stonith_connection_destroy_cb: LRMD lost STONITH connection", "mainloop_gio_callback: Connection to stonith-ng.* closed", "tengine_stonith_connection_destroy: Fencing daemon connection failed", "crmd.*stonith_api_add_notification: Callback already present", ], badnews_ignore=stonith_ignore) vgrind = self.Env["valgrind-procs"].split() for key in fullcomplist.keys(): if self.Env["valgrind-tests"]: if key in vgrind: # Processes running under valgrind can't be shot with "killall -9 processname" self.log( "Filtering %s from the component list as it is being profiled by valgrind" % key) continue if key == "stonith-ng" and not self.Env["DoFencing"]: continue self.complist.append(fullcomplist[key]) #self.complist = [ fullcomplist["pengine"] ] return self.complist