Beispiel #1
0
    def Components(self):
        complist = []
        if not len(list(self.fullcomplist.keys())):
            for c in ["cib", "lrmd", "crmd", "attrd"]:
                self.fullcomplist[c] = Process(
                    self,
                    c,
                    pats=self.templates.get_component(self.name, c),
                    badnews_ignore=self.templates.get_component(
                        self.name, "%s-ignore" % c),
                    common_ignore=self.templates.get_component(
                        self.name, "common-ignore"))

            # pengine uses dc_pats instead of pats
            self.fullcomplist["pengine"] = Process(
                self,
                "pengine",
                dc_pats=self.templates.get_component(self.name, "pengine"),
                badnews_ignore=self.templates.get_component(
                    self.name, "pengine-ignore"),
                common_ignore=self.templates.get_component(
                    self.name, "common-ignore"))

            # stonith-ng's process name is different from its component name
            self.fullcomplist["stonith-ng"] = Process(
                self,
                "stonith-ng",
                process="stonithd",
                pats=self.templates.get_component(self.name, "stonith"),
                badnews_ignore=self.templates.get_component(
                    self.name, "stonith-ignore"),
                common_ignore=self.templates.get_component(
                    self.name, "common-ignore"))

            # add (or replace) extra components
            self.fullcomplist["corosync"] = Process(
                self,
                "corosync",
                pats=self.templates.get_component(self.name, "corosync"),
                badnews_ignore=self.templates.get_component(
                    self.name, "corosync-ignore"),
                common_ignore=self.templates.get_component(
                    self.name, "common-ignore"))

        # Processes running under valgrind can't be shot with "killall -9 processname",
        # so don't include them in the returned list
        vgrind = self.Env["valgrind-procs"].split()
        for key in list(self.fullcomplist.keys()):
            if self.Env["valgrind-tests"]:
                if key in vgrind:
                    self.log(
                        "Filtering %s from the component list as it is being profiled by valgrind"
                        % key)
                    continue
            if key == "stonith-ng" and not self.Env["DoFencing"]:
                continue
            complist.append(self.fullcomplist[key])

        return complist
Beispiel #2
0
    def Components(self):
        complist = []
        if not len(list(self.fullcomplist.keys())):
            for c in [
                    "pacemaker-based", "pacemaker-controld", "pacemaker-attrd",
                    "pacemaker-execd", "pacemaker-fenced"
            ]:
                self.fullcomplist[c] = Process(
                    self,
                    c,
                    pats=self.templates.get_component(self.name, c),
                    badnews_ignore=self.templates.get_component(
                        self.name, "%s-ignore" % c),
                    common_ignore=self.templates.get_component(
                        self.name, "common-ignore"))

            # the scheduler uses dc_pats instead of pats
            self.fullcomplist["pacemaker-schedulerd"] = Process(
                self,
                "pacemaker-schedulerd",
                dc_pats=self.templates.get_component(self.name,
                                                     "pacemaker-schedulerd"),
                badnews_ignore=self.templates.get_component(
                    self.name, "pacemaker-schedulerd-ignore"),
                common_ignore=self.templates.get_component(
                    self.name, "common-ignore"))

            # add (or replace) extra components
            self.fullcomplist["corosync"] = Process(
                self,
                "corosync",
                pats=self.templates.get_component(self.name, "corosync"),
                badnews_ignore=self.templates.get_component(
                    self.name, "corosync-ignore"),
                common_ignore=self.templates.get_component(
                    self.name, "common-ignore"))

        # Processes running under valgrind can't be shot with "killall -9 processname",
        # so don't include them in the returned list
        vgrind = self.Env["valgrind-procs"].split()
        for key in list(self.fullcomplist.keys()):
            if self.Env["valgrind-tests"]:
                if key in vgrind:
                    self.log(
                        "Filtering %s from the component list as it is being profiled by valgrind"
                        % key)
                    continue
            if key == "pacemaker-fenced" and not self.Env["DoFencing"]:
                continue
            complist.append(self.fullcomplist[key])

        return complist
Beispiel #3
0
    def ais_components(self):

        complist = []
        if not len(self.fullcomplist.keys()):
            for c in ["cib", "lrmd", "crmd", "attrd"]:
                self.fullcomplist[c] = Process(
                    self,
                    c,
                    pats=self.templates.get_component(self.name, c),
                    badnews_ignore=self.templates.get_component(
                        self.name, "%s-ignore" % c),
                    common_ignore=self.templates.get_component(
                        self.name, "common-ignore"))

                self.fullcomplist["pengine"] = Process(
                    self,
                    "pengine",
                    dc_pats=self.templates.get_component(self.name, "pengine"),
                    badnews_ignore=self.templates.get_component(
                        self.name, "pengine-ignore"),
                    common_ignore=self.templates.get_component(
                        self.name, "common-ignore"))

                self.fullcomplist["stonith-ng"] = Process(
                    self,
                    "stonith-ng",
                    process="stonithd",
                    pats=self.templates.get_component(self.name, "stonith"),
                    badnews_ignore=self.templates.get_component(
                        self.name, "stonith-ignore"),
                    common_ignore=self.templates.get_component(
                        self.name, "common-ignore"))

        vgrind = self.Env["valgrind-procs"].split()
        for key in self.fullcomplist.keys():
            if self.Env["valgrind-tests"]:
                if key in vgrind:
                    # Processes running under valgrind can't be shot with "killall -9 processname"
                    self.log(
                        "Filtering %s from the component list as it is being profiled by valgrind"
                        % key)
                    continue
            if key == "stonith-ng" and not self.Env["DoFencing"]:
                continue

            complist.append(self.fullcomplist[key])

        #self.complist = [ fullcomplist["pengine"] ]
        return complist
Beispiel #4
0
    def Components(self):
        self.ais_components()

        aisexec_ignore = [
            "(ERROR|error): ais_dispatch: Receiving message .* failed",
            "crmd.*I_ERROR.*crmd_cib_connection_destroy",
            "cib.*(ERROR|error): cib_ais_destroy: AIS connection terminated",
            #"crmd.*(ERROR|error): crm_ais_destroy: AIS connection terminated",
            "crmd.*do_exit: Could not recover from internal error",
            "crmd.*I_TERMINATE.*do_recover",
            "attrd.*attrd_ais_destroy: Lost connection to OpenAIS service!",
            "stonithd.*(ERROR|error): AIS connection terminated",
        ]

        aisexec_ignore.extend(self.common_ignore)

        self.complist.append(
            Process(
                self,
                "aisexec",
                pats=[
                    "(ERROR|error): ais_dispatch: AIS connection failed",
                    "crmd.*(ERROR|error): do_exit: Could not recover from internal error",
                    "pengine.*Scheduling Node .* for STONITH",
                    "stonithd.*requests a STONITH operation RESET on node",
                    "stonithd.*Succeeded to STONITH the node",
                ],
                badnews_ignore=aisexec_ignore))
Beispiel #5
0
 def Components(self):
     extra = {}
     extra["corosync"] = Process(
         self,
         "corosync",
         pats=self.templates.get_component(self.name, "corosync"),
         badnews_ignore=self.templates.get_component(
             self.name, "corosync-ignore"),
         common_ignore=self.templates.get_component(self.name,
                                                    "common-ignore"))
     return self.ais_components(extra=extra)
Beispiel #6
0
    def Components(self):
        self.ais_components()

        corosync_ignore = [
            r"error: crmd_quorum_destroy: connection terminated",
            r"error: lrm_state_verify_stopped: .* resources were active at lrm disconnect",

            r"pacemakerd.*error: pcmk_child_exit: Child process .* exited",
            r"error: send_cpg_message: Sending message via cpg FAILED",
            r"error: verify_stopped: Resource .* was active at shutdown.  You may ignore this error if it is unmanaged.",
            r"error: pcmk_cpg_dispatch: Connection to the CPG API failed:",
            r"error: pcmk_cman_dispatch: Connection to cman failed: -1",
            r"error: crm_ipc_read: Connection to cib_shm failed",
            r"error: mainloop_gio_callback: Connection to .* closed",
            r"crmd_cib_connection_destroys:",
            r"crmd.*error: do_recover: Action A_RECOVER .* not supported",
            r"error: do_log: FSA: Input I_TERMINATE.*do_recover",
            r"error: cib_ais_destroy: Corosync connection lost!  Exiting.",
            r"attrd.*error: attrd_cib_connection_destroy: Connection to the CIB terminated...",
            r"error: send_ais_text: Sending message .* via cpg: FAILED",

            r"error: crmd_quorum_destroy: connection terminated",
            r"error: lrm_state_verify_stopped: .* resources were active at lrm disconnect.",

            r"error: crm_ipc_read: Connection to stonith-ng failed",
            r"crit: tengine_stonith_connection_destroy: Fencing daemon connection failed",
            r"error: stonith_connection_destroy_cb: LRMD lost STONITH connection",
            r"error: stonith_connection_failed: STONITH connection failed",
            r"error: te_connect_stonith: Sign-in failed: triggered a retry",
            r"error: process_lrm_event: LRM operation Fencing.*",
            r"error: do_log: FSA: Input I_ERROR from crmd_cib_connection_destroy.* received in state",
            r"error: do_log: FSA: Input I_ERROR from do_shutdown_req.* received in state",
            r"do_state_transition: State transition .* S_RECOVERY .*origin=crmd_cib_connection_destroy",
            r"do_state_transition: State transition .* S_RECOVERY .*origin=do_shutdown_req",

            r"crmd.*error: cib_native_perform_op_delegate: Couldn't perform cib_slave operation",
            r"crmd.*error: cib_native_perform_op_delegate: CIB disconnected",
            ]

        self.complist.append(Process(self, "corosync", pats = [
                    r"pacemakerd.*error: cfg_connection_destroy: Connection destroyed",
                    r"pacemakerd.*error: cpg_connection_destroy: Connection destroyed",
                    r"attrd_ais_destroy: Lost connection to Corosync service!",
                    r"stonith_peer_ais_destroy: AIS connection terminated",
                    r"cib_ais_destroy: Corosync connection lost!  Exiting.",
                    r"crmd_ais_destroy: connection terminated",
                    r"error: lrm_state_verify_stopped: .* resources were active at lrm disconnect",
                    r"pengine.*Scheduling Node .* for STONITH",
                    r"log_operation: Operation .* for host .* with device .* returned: 0",
                    r"tengine_stonith_notify: Peer .* was terminated .*: OK",
                    ], badnews_ignore = corosync_ignore, common_ignore = self.common_ignore))

        return self.complist
Beispiel #7
0
    def Components(self):
        self.ais_components()
        c = "corosync"

        self.fullcomplist[c] = Process(
            self,
            c,
            pats=self.templates.get_component(self.name, c),
            badnews_ignore=self.templates.get_component(
                self.name, "%s-ignore" % c),
            common_ignore=self.templates.get_component(self.name,
                                                       "common-ignore"))

        return self.ais_components()
Beispiel #8
0
    def Components(self):
        self.ais_components()

        corosync_ignore = [
            r"error: pcmk_cpg_dispatch: Connection to the CPG API failed: Library error",
            r"pacemakerd.*error: pcmk_child_exit: Child process .* exited",
            r"cib.*error: cib_cs_destroy: Corosync connection lost",
            r"attrd.*error: attrd_cib_connection_destroy: Connection to the CIB terminated",
            r"stonith-ng.*error: stonith_peer_cs_destroy: Corosync connection terminated",
            r"error: pcmk_child_exit: Child process cib .* exited: Invalid argument",
            r"error: pcmk_child_exit: Child process attrd .* exited: Transport endpoint is not connected",
            r"error: pcmk_child_exit: Child process crmd .* exited: Link has been severed",
            r"lrmd.*error: crm_ipc_read: Connection to stonith-ng failed",
            r"lrmd.*error: mainloop_gio_callback: Connection to stonith-ng.* closed",
            r"lrmd.*error: stonith_connection_destroy_cb: LRMD lost STONITH connection",
            r"crmd.*do_state_transition: State transition .* S_RECOVERY",
            r"crmd.*error: do_log: FSA: Input I_ERROR",
            r"crmd.*error: do_log: FSA: Input I_TERMINATE",
            r"crmd.*error: pcmk_cman_dispatch: Connection to cman failed",
            r"crmd.*error: crmd_fast_exit: Could not recover from internal error",
            r"error: crm_ipc_read: Connection to cib_shm failed",
            r"error: mainloop_gio_callback: Connection to cib_shm.* closed",
        ]

        self.complist.append(
            Process(
                self,
                "corosync",
                pats=[
                    r"pacemakerd.*error: cfg_connection_destroy: Connection destroyed",
                    r"pacemakerd.*error: mcp_cpg_destroy: Connection destroyed",
                    r"attrd_cs_destroy: Lost connection to Corosync service!",
                    r"stonith_peer_cs_destroy: Corosync connection terminated",
                    r"cib_cs_destroy: Corosync connection lost!  Exiting.",
                    r"crmd_(cs|quorum)_destroy: connection terminated",
                    r"pengine.*Scheduling Node .* for STONITH",
                    r"tengine_stonith_notify: Peer .* was terminated .*: OK",
                ],
                badnews_ignore=corosync_ignore,
                common_ignore=self.common_ignore))

        return self.complist
Beispiel #9
0
    def Components(self):
        complist = []
        common_ignore = [
            "Pending action:",
            "(ERROR|error): crm_log_message_adv:",
            "(ERROR|error): MSG: No message to dump",
            "pending LRM operations at shutdown",
            "Lost connection to the CIB manager",
            "Connection to the CIB terminated...",
            "Sending message to the CIB manager FAILED",
            "Action A_RECOVER .* not supported",
            "(ERROR|error): stonithd_op_result_ready: not signed on",
            "pingd.*(ERROR|error): send_update: Could not send update",
            "send_ipc_message: IPC Channel to .* is not connected",
            "unconfirmed_actions: Waiting on .* unconfirmed actions",
            "cib_native_msgready: Message pending on command channel",
            r": Performing A_EXIT_1 - forcefully exiting ",
            r"Resource .* was active at shutdown.  You may ignore this error if it is unmanaged.",
        ]

        stonith_ignore = [
            r"Updating failcount for child_DoFencing",
            r"error.*: Fencer connection failed \(will retry\)",
            "pacemaker-execd.*(ERROR|error): stonithd_receive_ops_result failed.",
        ]

        stonith_ignore.extend(common_ignore)

        ccm = Process(
            self,
            "ccm",
            triggersreboot=self.fastfail,
            pats=[
                "State transition .* S_RECOVERY",
                "pacemaker-controld.*Action A_RECOVER .* not supported",
                r"pacemaker-controld.*: Input I_TERMINATE .*from do_recover",
                r"pacemaker-controld.*: Could not recover from internal error",
                "pacemaker-controld.*I_ERROR.*crmd_cib_connection_destroy",
                # these status numbers are likely wrong now
                r"pacemaker-controld.*exited with status 2",
                r"attrd.*exited with status 1",
                r"cib.*exited with status 2",

                # Not if it was fenced
                #                    "A new node joined the cluster",

                #                    "WARN: determine_online_status: Node .* is unclean",
                #                    "Scheduling Node .* for STONITH",
                #                    "Executing .* fencing operation",
                #                    "tengine_stonith_callback: .*result=0",
                #                    "Processing I_NODE_JOIN:.* cause=C_HA_MESSAGE",
                #                    "State transition S_.* -> S_INTEGRATION.*input=I_NODE_JOIN",
                "State transition S_STARTING -> S_PENDING",
            ],
            badnews_ignore=common_ignore)

        based = Process(
            self,
            "pacemaker-based",
            triggersreboot=self.fastfail,
            pats=[
                "State transition .* S_RECOVERY",
                "Lost connection to the CIB manager",
                "Connection to the CIB manager terminated",
                r"pacemaker-controld.*: Input I_TERMINATE .*from do_recover",
                "pacemaker-controld.*I_ERROR.*crmd_cib_connection_destroy",
                r"pacemaker-controld.*: Could not recover from internal error",
                # these status numbers are likely wrong now
                r"pacemaker-controld.*exited with status 2",
                r"attrd.*exited with status 1",
            ],
            badnews_ignore=common_ignore)

        execd = Process(
            self,
            "pacemaker-execd",
            triggersreboot=self.fastfail,
            pats=[
                "State transition .* S_RECOVERY",
                "LRM Connection failed",
                "pacemaker-controld.*I_ERROR.*lrm_connection_destroy",
                "State transition S_STARTING -> S_PENDING",
                r"pacemaker-controld.*: Input I_TERMINATE .*from do_recover",
                r"pacemaker-controld.*: Could not recover from internal error",
                # this status number is likely wrong now
                r"pacemaker-controld.*exited with status 2",
            ],
            badnews_ignore=common_ignore)

        controld = Process(
            self,
            "pacemaker-controld",
            triggersreboot=self.fastfail,
            pats=[
                #                    "WARN: determine_online_status: Node .* is unclean",
                #                    "Scheduling Node .* for STONITH",
                #                    "Executing .* fencing operation",
                #                    "tengine_stonith_callback: .*result=0",
                "State transition .* S_IDLE",
                "State transition S_STARTING -> S_PENDING",
            ],
            badnews_ignore=common_ignore)

        schedulerd = Process(
            self,
            "pacemaker-schedulerd",
            triggersreboot=self.fastfail,
            pats=[
                "State transition .* S_RECOVERY",
                r"pacemaker-controld.*: Input I_TERMINATE .*from do_recover",
                r"pacemaker-controld.*: Could not recover from internal error",
                r"pacemaker-controld.*CRIT.*: Connection to the scheduler failed",
                "pacemaker-controld.*I_ERROR.*save_cib_contents",
                # this status number is likely wrong now
                r"pacemaker-controld.*exited with status 2",
            ],
            badnews_ignore=common_ignore,
            dc_only=1)

        if self.Env["DoFencing"] == 1:
            complist.append(
                Process(
                    self,
                    "stoniths",
                    triggersreboot=self.fastfail,
                    dc_pats=[
                        r"pacemaker-controld.*CRIT.*: Fencing daemon connection failed",
                        "Attempting connection to fencing daemon",
                    ],
                    badnews_ignore=stonith_ignore))

        if self.fastfail == 0:
            ccm.pats.extend([
                # these status numbers are likely wrong now
                r"attrd.*exited with status 1",
                r"pacemaker-(based|controld).*exited with status 2",
            ])
            based.pats.extend([
                # these status numbers are likely wrong now
                r"attrd.*exited with status 1",
                r"pacemaker-controld.*exited with status 2",
            ])
            execd.pats.extend([
                # these status numbers are likely wrong now
                r"pacemaker-controld.*exited with status 2",
            ])

        complist.append(ccm)
        complist.append(based)
        complist.append(execd)
        complist.append(controld)
        complist.append(schedulerd)

        return complist
Beispiel #10
0
    def ais_components(self):
        fullcomplist = {}
        self.complist = []
        self.common_ignore = [
            "Pending action:",
            "(ERROR|error): crm_log_message_adv:",
            "(ERROR|error): MSG: No message to dump",
            "pending LRM operations at shutdown",
            "Lost connection to the CIB service",
            "Connection to the CIB terminated...",
            "Sending message to CIB service FAILED",
            "apply_xml_diff: Diff application failed!",
            "crmd.*Action A_RECOVER .* not supported",
            "unconfirmed_actions: Waiting on .* unconfirmed actions",
            "cib_native_msgready: Message pending on command channel",
            "crmd.*do_exit: Performing A_EXIT_1 - forcefully exiting the CRMd",
            "verify_stopped: Resource .* was active at shutdown.  You may ignore this error if it is unmanaged.",
            "(ERROR|error): attrd_connection_destroy: Lost connection to attrd",
            "info: te_fence_node: Executing .* fencing operation",
            #                    "error: native_create_actions: Resource .*stonith::.* is active on 2 nodes attempting recovery",
            #                    "error: process_pe_message: Transition .* ERRORs found during PE processing",
        ]

        fullcomplist["cib"] = Process(
            self,
            "cib",
            pats=[
                "State transition .* S_RECOVERY",
                "Respawning .* crmd",
                "Respawning .* attrd",
                "error: crm_ipc_read: Connection to cib_.* failed",
                "error: mainloop_gio_callback: Connection to cib_.* closed",
                "Connection to the CIB terminated...",
                "Child process crmd exited .* rc=2",
                "Child process attrd exited .* rc=1",
                "crmd.*Input I_TERMINATE from do_recover",
                "crmd.*I_ERROR.*crmd_cib_connection_destroy",
                "crmd.*do_exit: Could not recover from internal error",
            ],
            badnews_ignore=self.common_ignore)

        fullcomplist["lrmd"] = Process(
            self,
            "lrmd",
            pats=[
                "State transition .* S_RECOVERY",
                "LRM Connection failed",
                "Respawning .* crmd",
                "error: crm_ipc_read: Connection to lrmd failed",
                "error: mainloop_gio_callback: Connection to lrmd.* closed",
                "crmd.*I_ERROR.*lrm_connection_destroy",
                "Child process crmd exited .* rc=2",
                "crmd.*Input I_TERMINATE from do_recover",
                "crmd.*do_exit: Could not recover from internal error",
            ],
            badnews_ignore=self.common_ignore)

        fullcomplist["crmd"] = Process(
            self,
            "crmd",
            pats=[
                #                    "WARN: determine_online_status: Node .* is unclean",
                #                    "Scheduling Node .* for STONITH",
                #                    "Executing .* fencing operation",
                # Only if the node wasn't the DC:  "State transition S_IDLE",
                "State transition .* -> S_IDLE",
            ],
            badnews_ignore=self.common_ignore)

        fullcomplist["attrd"] = Process(self,
                                        "attrd",
                                        pats=[],
                                        badnews_ignore=self.common_ignore)

        fullcomplist["pengine"] = Process(
            self,
            "pengine",
            dc_pats=[
                "State transition .* S_RECOVERY",
                "Respawning .* crmd",
                "Child process crmd exited .* rc=2",
                "crm_ipc_read: Connection to pengine failed",
                "error: mainloop_gio_callback: Connection to pengine.* closed",
                "crit: pe_ipc_destroy: Connection to the Policy Engine failed",
                "crmd.*I_ERROR.*save_cib_contents",
                "crmd.*Input I_TERMINATE from do_recover",
                "crmd.*do_exit: Could not recover from internal error",
            ],
            badnews_ignore=self.common_ignore)

        stonith_ignore = [
            "LogActions: Recover Fencing",
            "update_failcount: Updating failcount for Fencing",
            "(ERROR|error): te_connect_stonith: Sign-in failed: triggered a retry",
            "stonith_connection_failed: STONITH connection failed, finalizing .* pending operations.",
            "process_lrm_event: LRM operation Fencing.* Error"
        ]

        stonith_ignore.extend(self.common_ignore)

        fullcomplist["stonith-ng"] = Process(
            self,
            "stonith-ng",
            process="stonithd",
            pats=[
                "crm_ipc_read: Connection to stonith-ng failed",
                "stonith_connection_destroy_cb: LRMD lost STONITH connection",
                "mainloop_gio_callback: Connection to stonith-ng.* closed",
                "tengine_stonith_connection_destroy: Fencing daemon connection failed",
                "crmd.*stonith_api_add_notification: Callback already present",
            ],
            badnews_ignore=stonith_ignore)

        vgrind = self.Env["valgrind-procs"].split()
        for key in fullcomplist.keys():
            if self.Env["valgrind-tests"]:
                if key in vgrind:
                    # Processes running under valgrind can't be shot with "killall -9 processname"
                    self.log(
                        "Filtering %s from the component list as it is being profiled by valgrind"
                        % key)
                    continue
            if key == "stonith-ng" and not self.Env["DoFencing"]:
                continue

            self.complist.append(fullcomplist[key])

        #self.complist = [ fullcomplist["pengine"] ]
        return self.complist