Ejemplo n.º 1
0
    def partition_stable(self, nodes, timeout=None):
        watchpats = [ ]
        watchpats.append("Current ping state: S_IDLE")
        watchpats.append(self.templates["Pat:DC_IDLE"])
        self.debug("Waiting for cluster stability...") 

        if timeout == None:
            timeout = self.Env["DeadTime"]

        if len(nodes) < 3:
            self.debug("Cluster is inactive") 
            return 1

        idle_watch = LogWatcher(self.Env["LogFileName"], watchpats, "ClusterStable", timeout, hosts=nodes.split(), kind=self.Env["LogWatcher"])
        idle_watch.setwatch()

        for node in nodes.split():
            # have each node dump its current state
            self.rsh(node, self.templates["StatusCmd"] % node, 1)

        ret = idle_watch.look()
        while ret:
            self.debug(ret) 
            for node in nodes.split():
                if re.search(node, ret):
                    return 1
            ret = idle_watch.look()

        self.debug("Warn: Partition %s not IDLE after %ds" % (repr(nodes), timeout)) 
        return None
Ejemplo n.º 2
0
    def partition_stable(self, nodes, timeout=None):
        watchpats = [ ]
        watchpats.append("Current ping state: S_IDLE")
        watchpats.append(self.templates["Pat:DC_IDLE"])
        self.debug("Waiting for cluster stability...") 

        if timeout == None:
            timeout = self.Env["DeadTime"]

        if len(nodes) < 3:
            self.debug("Cluster is inactive") 
            return 1

        idle_watch = LogWatcher(self.Env["LogFileName"], watchpats, "ClusterStable", timeout, hosts=nodes.split(), kind=self.Env["LogWatcher"])
        idle_watch.setwatch()

        for node in nodes.split():
            # have each node dump its current state
            self.rsh(node, self.templates["StatusCmd"] % node, 1)

        ret = idle_watch.look()
        while ret:
            self.debug(ret) 
            for node in nodes.split():
                if re.search(node, ret):
                    return 1
            ret = idle_watch.look()

        self.debug("Warn: Partition %s not IDLE after %ds" % (repr(nodes), timeout)) 
        return None
Ejemplo n.º 3
0
    def test_node_CM(self, node):
        '''Report the status of the cluster manager on a given node'''

        watchpats = []
        watchpats.append("Current ping state: (S_IDLE|S_NOT_DC)")
        watchpats.append(self.templates["Pat:NonDC_started"] % node)
        watchpats.append(self.templates["Pat:DC_started"] % node)
        idle_watch = LogWatcher(self.Env["LogFileName"],
                                watchpats,
                                "ClusterIdle",
                                hosts=[node],
                                kind=self.Env["LogWatcher"])
        idle_watch.setwatch()

        out = self.rsh(node, self.templates["StatusCmd"] % node, 1)
        self.debug("Node %s status: '%s'" % (node, out))

        if not out or (out.find('ok') < 0):
            if self.ShouldBeStatus[node] == "up":
                self.log(
                    "Node status for %s is %s but we think it should be %s" %
                    (node, "down", self.ShouldBeStatus[node]))
            self.ShouldBeStatus[node] = "down"
            return 0

        if self.ShouldBeStatus[node] == "down":
            self.log(
                "Node status for %s is %s but we think it should be %s: %s" %
                (node, "up", self.ShouldBeStatus[node], out))

        self.ShouldBeStatus[node] = "up"

        # check the output first - because syslog-ng loses messages
        if out.find('S_NOT_DC') != -1:
            # Up and stable
            return 2
        if out.find('S_IDLE') != -1:
            # Up and stable
            return 2

        # fall back to syslog-ng and wait
        if not idle_watch.look():
            # just up
            self.debug("Warn: Node %s is unstable: %s" % (node, out))
            return 1

        # Up and stable
        return 2
Ejemplo n.º 4
0
    def test_node_CM(self, node):
        '''Report the status of the cluster manager on a given node'''

        watchpats = [ ]
        watchpats.append("Current ping state: (S_IDLE|S_NOT_DC)")
        watchpats.append(self.templates["Pat:Slave_started"]%node)
        watchpats.append(self.templates["Pat:Master_started"]%node)
        idle_watch = LogWatcher(self.Env["LogFileName"], watchpats, "ClusterIdle", hosts=[node], kind=self.Env["LogWatcher"])
        idle_watch.setwatch()

        out = self.rsh(node, self.templates["StatusCmd"]%node, 1)
        self.debug("Node %s status: '%s'" %(node, out))            

        if not out or string.find(out, 'ok') < 0:
            if self.ShouldBeStatus[node] == "up":
                self.log(
                    "Node status for %s is %s but we think it should be %s"
                    % (node, "down", self.ShouldBeStatus[node]))
            self.ShouldBeStatus[node] = "down"
            return 0

        if self.ShouldBeStatus[node] == "down":
            self.log(
                "Node status for %s is %s but we think it should be %s: %s"
                % (node, "up", self.ShouldBeStatus[node], out))

        self.ShouldBeStatus[node] = "up"

        # check the output first - because syslog-ng looses messages
        if string.find(out, 'S_NOT_DC') != -1:
            # Up and stable
            return 2
        if string.find(out, 'S_IDLE') != -1:
            # Up and stable
            return 2

        # fall back to syslog-ng and wait
        if not idle_watch.look():
            # just up
            self.debug("Warn: Node %s is unstable: %s" % (node, out))
            return 1

        # Up and stable
        return 2
Ejemplo n.º 5
0
class Scenario:
    (
'''The basic idea of a scenario is that of an ordered list of
ScenarioComponent objects.  Each ScenarioComponent is SetUp() in turn,
and then after the tests have been run, they are torn down using TearDown()
(in reverse order).

A Scenario is applicable to a particular cluster manager iff each
ScenarioComponent is applicable.

A partially set up scenario is torn down if it fails during setup.
''')

    def __init__(self, ClusterManager, Components, Audits, Tests):

        "Initialize the Scenario from the list of ScenarioComponents"

        self.ClusterManager = ClusterManager
        self.Components = Components
        self.Audits  = Audits
        self.Tests = Tests

        self.BadNews = None
        self.TestSets = []
        self.Stats = {"success":0, "failure":0, "BadNews":0, "skipped":0}
        self.Sets = []

        #self.ns=CTS.NodeStatus(self.Env)

        for comp in Components:
            if not issubclass(comp.__class__, ScenarioComponent):
                raise ValueError("Init value must be subclass of ScenarioComponent")

        for audit in Audits:
            if not issubclass(audit.__class__, ClusterAudit):
                raise ValueError("Init value must be subclass of ClusterAudit")

        for test in Tests:
            if not issubclass(test.__class__, CTSTest):
                raise ValueError("Init value must be a subclass of CTSTest")

    def IsApplicable(self):
        (
'''A Scenario IsApplicable() iff each of its ScenarioComponents IsApplicable()
'''
        )

        for comp in self.Components:
            if not comp.IsApplicable():
                return None
        return 1

    def SetUp(self):
        '''Set up the Scenario. Return TRUE on success.'''

        self.ClusterManager.prepare()
        self.audit() # Also detects remote/local log config
        self.ClusterManager.StatsMark(0)
        self.ClusterManager.ns.WaitForAllNodesToComeUp(self.ClusterManager.Env["nodes"])

        self.audit()
        if self.ClusterManager.Env["valgrind-tests"]:
            self.ClusterManager.install_helper("cts.supp")

        self.BadNews = LogWatcher(self.ClusterManager.Env["LogFileName"],
                                  self.ClusterManager.templates.get_patterns(
                                      self.ClusterManager.Env["Name"], "BadNews"), "BadNews", 0,
                                  kind=self.ClusterManager.Env["LogWatcher"],
                                  hosts=self.ClusterManager.Env["nodes"])
        self.BadNews.setwatch() # Call after we've figured out what type of log watching to do in LogAudit

        j = 0
        while j < len(self.Components):
            if not self.Components[j].SetUp(self.ClusterManager):
                # OOPS!  We failed.  Tear partial setups down.
                self.audit()
                self.ClusterManager.log("Tearing down partial setup")
                self.TearDown(j)
                return None
            j = j + 1

        self.audit()
        return 1

    def TearDown(self, max=None):

        '''Tear Down the Scenario - in reverse order.'''

        if max == None:
            max = len(self.Components)-1
        j = max
        while j >= 0:
            self.Components[j].TearDown(self.ClusterManager)
            j = j - 1

        self.audit()
        self.ClusterManager.StatsExtract()

    def incr(self, name):
        '''Increment (or initialize) the value associated with the given name'''
        if not name in self.Stats:
            self.Stats[name] = 0
        self.Stats[name] = self.Stats[name]+1

    def run(self, Iterations):
        self.ClusterManager.oprofileStart()
        try:
            self.run_loop(Iterations)
            self.ClusterManager.oprofileStop()
        except:
            self.ClusterManager.oprofileStop()
            raise

    def run_loop(self, Iterations):
        raise ValueError("Abstract Class member (run_loop)")

    def run_test(self, test, testcount):
        nodechoice = self.ClusterManager.Env.RandomNode()

        ret = 1
        where = ""
        did_run = 0

        self.ClusterManager.StatsMark(testcount)
        self.ClusterManager.instance_errorstoignore_clear()
        self.ClusterManager.log(("Running test %s" % test.name).ljust(35) + (" (%s) " % nodechoice).ljust(15) + "[" + ("%d" % testcount).rjust(3) + "]")

        starttime = test.set_timer()
        if not test.setup(nodechoice):
            self.ClusterManager.log("Setup failed")
            ret = 0

        elif not test.canrunnow(nodechoice):
            self.ClusterManager.log("Skipped")
            test.skipped()

        else:
            did_run = 1
            ret = test(nodechoice)

        if not test.teardown(nodechoice):
            self.ClusterManager.log("Teardown failed")
            if self.ClusterManager.Env["continue"] == 1:
                answer = "Y"
            else:
                try:
                    answer = raw_input('Continue? [nY]')
                except EOFError as e:
                    answer = "n"
            if answer and answer == "n":
                raise ValueError("Teardown of %s on %s failed" % (test.name, nodechoice))
            ret = 0

        stoptime = time.time()
        self.ClusterManager.oprofileSave(testcount)

        elapsed_time = stoptime - starttime
        test_time = stoptime - test.get_timer()
        if not test["min_time"]:
            test["elapsed_time"] = elapsed_time
            test["min_time"] = test_time
            test["max_time"] = test_time
        else:
            test["elapsed_time"] = test["elapsed_time"] + elapsed_time
            if test_time < test["min_time"]:
                test["min_time"] = test_time
            if test_time > test["max_time"]:
                test["max_time"] = test_time

        if ret:
            self.incr("success")
            test.log_timer()
        else:
            self.incr("failure")
            self.ClusterManager.statall()
            did_run = 1  # Force the test count to be incremented anyway so test extraction works

        self.audit(test.errorstoignore())
        return did_run

    def summarize(self):
        self.ClusterManager.log("****************")
        self.ClusterManager.log("Overall Results:" + repr(self.Stats))
        self.ClusterManager.log("****************")

        stat_filter = {
            "calls":0,
            "failure":0,
            "skipped":0,
            "auditfail":0,
            }
        self.ClusterManager.log("Test Summary")
        for test in self.Tests:
            for key in list(stat_filter.keys()):
                stat_filter[key] = test.Stats[key]
            self.ClusterManager.log(("Test %s: "%test.name).ljust(25) + " %s"%repr(stat_filter))

        self.ClusterManager.debug("Detailed Results")
        for test in self.Tests:
            self.ClusterManager.debug(("Test %s: "%test.name).ljust(25) + " %s"%repr(test.Stats))

        self.ClusterManager.log("<<<<<<<<<<<<<<<< TESTS COMPLETED")

    def audit(self, LocalIgnore=[]):
        errcount = 0
        ignorelist = []
        ignorelist.append("CTS:")
        ignorelist.extend(LocalIgnore)
        ignorelist.extend(self.ClusterManager.errorstoignore())
        ignorelist.extend(self.ClusterManager.instance_errorstoignore())

        # This makes sure everything is stabilized before starting...
        failed = 0
        for audit in self.Audits:
            if not audit():
                self.ClusterManager.log("Audit " + audit.name() + " FAILED.")
                failed += 1
            else:
                self.ClusterManager.debug("Audit " + audit.name() + " passed.")

        while errcount < 1000:
            match = None
            if self.BadNews:
                match = self.BadNews.look(0)

            if match:
                add_err = 1
                for ignore in ignorelist:
                    if add_err == 1 and re.search(ignore, match):
                        add_err = 0
                if add_err == 1:
                    self.ClusterManager.log("BadNews: " + match)
                    self.incr("BadNews")
                    errcount = errcount + 1
            else:
                break
        else:
            if self.ClusterManager.Env["continue"] == 1:
                answer = "Y"
            else:
                try:
                    answer = raw_input('Big problems. Continue? [nY]')
                except EOFError as e:
                    answer = "n"
            if answer and answer == "n":
                self.ClusterManager.log("Shutting down.")
                self.summarize()
                self.TearDown()
                raise ValueError("Looks like we hit a BadNews jackpot!")

        if self.BadNews:
            self.BadNews.end()
        return failed
Ejemplo n.º 6
0
class Scenario:
    ('''The basic idea of a scenario is that of an ordered list of
ScenarioComponent objects.  Each ScenarioComponent is SetUp() in turn,
and then after the tests have been run, they are torn down using TearDown()
(in reverse order).

A Scenario is applicable to a particular cluster manager iff each
ScenarioComponent is applicable.

A partially set up scenario is torn down if it fails during setup.
''')

    def __init__(self, ClusterManager, Components, Audits, Tests):

        "Initialize the Scenario from the list of ScenarioComponents"

        self.ClusterManager = ClusterManager
        self.Components = Components
        self.Audits = Audits
        self.Tests = Tests

        self.BadNews = None
        self.TestSets = []
        self.Stats = {"success": 0, "failure": 0, "BadNews": 0, "skipped": 0}
        self.Sets = []

        #self.ns=CTS.NodeStatus(self.Env)

        for comp in Components:
            if not issubclass(comp.__class__, ScenarioComponent):
                raise ValueError(
                    "Init value must be subclass of ScenarioComponent")

        for audit in Audits:
            if not issubclass(audit.__class__, ClusterAudit):
                raise ValueError("Init value must be subclass of ClusterAudit")

        for test in Tests:
            if not issubclass(test.__class__, CTSTest):
                raise ValueError("Init value must be a subclass of CTSTest")

    def IsApplicable(self):
        ('''A Scenario IsApplicable() iff each of its ScenarioComponents IsApplicable()
''')

        for comp in self.Components:
            if not comp.IsApplicable():
                return None
        return 1

    def SetUp(self):
        '''Set up the Scenario. Return TRUE on success.'''

        self.ClusterManager.prepare()
        self.audit()  # Also detects remote/local log config
        self.ClusterManager.StatsMark(0)
        self.ClusterManager.ns.WaitForAllNodesToComeUp(
            self.ClusterManager.Env["nodes"])

        self.audit()
        if self.ClusterManager.Env["valgrind-tests"]:
            self.ClusterManager.install_helper("cts.supp")

        self.BadNews = LogWatcher(self.ClusterManager.Env["LogFileName"],
                                  self.ClusterManager.templates.get_patterns(
                                      self.ClusterManager.Env["Name"],
                                      "BadNews"),
                                  "BadNews",
                                  0,
                                  kind=self.ClusterManager.Env["LogWatcher"],
                                  hosts=self.ClusterManager.Env["nodes"])
        self.BadNews.setwatch(
        )  # Call after we've figured out what type of log watching to do in LogAudit

        j = 0
        while j < len(self.Components):
            if not self.Components[j].SetUp(self.ClusterManager):
                # OOPS!  We failed.  Tear partial setups down.
                self.audit()
                self.ClusterManager.log("Tearing down partial setup")
                self.TearDown(j)
                return None
            j = j + 1

        self.audit()
        return 1

    def TearDown(self, max=None):
        '''Tear Down the Scenario - in reverse order.'''

        if max == None:
            max = len(self.Components) - 1
        j = max
        while j >= 0:
            self.Components[j].TearDown(self.ClusterManager)
            j = j - 1

        self.audit()
        self.ClusterManager.StatsExtract()

    def incr(self, name):
        '''Increment (or initialize) the value associated with the given name'''
        if not name in self.Stats:
            self.Stats[name] = 0
        self.Stats[name] = self.Stats[name] + 1

    def run(self, Iterations):
        self.ClusterManager.oprofileStart()
        try:
            self.run_loop(Iterations)
            self.ClusterManager.oprofileStop()
        except:
            self.ClusterManager.oprofileStop()
            raise

    def run_loop(self, Iterations):
        raise ValueError("Abstract Class member (run_loop)")

    def run_test(self, test, testcount):
        nodechoice = self.ClusterManager.Env.RandomNode()

        ret = 1
        where = ""
        did_run = 0

        self.ClusterManager.StatsMark(testcount)
        self.ClusterManager.instance_errorstoignore_clear()
        self.ClusterManager.log(("Running test %s" % test.name).ljust(35) +
                                (" (%s) " % nodechoice).ljust(15) + "[" +
                                ("%d" % testcount).rjust(3) + "]")

        starttime = test.set_timer()
        if not test.setup(nodechoice):
            self.ClusterManager.log("Setup failed")
            ret = 0

        elif not test.canrunnow(nodechoice):
            self.ClusterManager.log("Skipped")
            test.skipped()

        else:
            did_run = 1
            ret = test(nodechoice)

        if not test.teardown(nodechoice):
            self.ClusterManager.log("Teardown failed")
            answer = raw_input('Continue? [nY] ')
            if answer and answer == "n":
                raise ValueError("Teardown of %s on %s failed" %
                                 (test.name, nodechoice))
            ret = 0

        stoptime = time.time()
        self.ClusterManager.oprofileSave(testcount)

        elapsed_time = stoptime - starttime
        test_time = stoptime - test.get_timer()
        if not test["min_time"]:
            test["elapsed_time"] = elapsed_time
            test["min_time"] = test_time
            test["max_time"] = test_time
        else:
            test["elapsed_time"] = test["elapsed_time"] + elapsed_time
            if test_time < test["min_time"]:
                test["min_time"] = test_time
            if test_time > test["max_time"]:
                test["max_time"] = test_time

        if ret:
            self.incr("success")
            test.log_timer()
        else:
            self.incr("failure")
            self.ClusterManager.statall()
            did_run = 1  # Force the test count to be incrimented anyway so test extraction works

        self.audit(test.errorstoignore())
        return did_run

    def summarize(self):
        self.ClusterManager.log("****************")
        self.ClusterManager.log("Overall Results:" + repr(self.Stats))
        self.ClusterManager.log("****************")

        stat_filter = {
            "calls": 0,
            "failure": 0,
            "skipped": 0,
            "auditfail": 0,
        }
        self.ClusterManager.log("Test Summary")
        for test in self.Tests:
            for key in list(stat_filter.keys()):
                stat_filter[key] = test.Stats[key]
            self.ClusterManager.log(("Test %s: " % test.name).ljust(25) +
                                    " %s" % repr(stat_filter))

        self.ClusterManager.debug("Detailed Results")
        for test in self.Tests:
            self.ClusterManager.debug(("Test %s: " % test.name).ljust(25) +
                                      " %s" % repr(test.Stats))

        self.ClusterManager.log("<<<<<<<<<<<<<<<< TESTS COMPLETED")

    def audit(self, LocalIgnore=[]):
        errcount = 0
        ignorelist = []
        ignorelist.append("CTS:")
        ignorelist.extend(LocalIgnore)
        ignorelist.extend(self.ClusterManager.errorstoignore())
        ignorelist.extend(self.ClusterManager.instance_errorstoignore())

        # This makes sure everything is stabilized before starting...
        failed = 0
        for audit in self.Audits:
            if not audit():
                self.ClusterManager.log("Audit " + audit.name() + " FAILED.")
                failed += 1
            else:
                self.ClusterManager.debug("Audit " + audit.name() + " passed.")

        while errcount < 1000:
            match = None
            if self.BadNews:
                match = self.BadNews.look(0)

            if match:
                add_err = 1
                for ignore in ignorelist:
                    if add_err == 1 and re.search(ignore, match):
                        add_err = 0
                if add_err == 1:
                    self.ClusterManager.log("BadNews: " + match)
                    self.incr("BadNews")
                    errcount = errcount + 1
            else:
                break
        else:
            answer = raw_input('Big problems.  Continue? [nY]')
            if answer and answer == "n":
                self.ClusterManager.log("Shutting down.")
                self.summarize()
                self.TearDown()
                raise ValueError("Looks like we hit a BadNews jackpot!")

        if self.BadNews:
            self.BadNews.end()
        return failed