Example #1
0
 def test_cfg_good(self):
     cfg = """\
         prdcr_add name=%(prdcr)s xprt=%(xprt)s host=%(host)s port=%(port)s \
                   type=active interval=1000000
         prdcr_start name=%(prdcr)s
         updtr_add name=%(updtr)s interval=1000000 offset=500000
         updtr_prdcr_add name=%(updtr)s regex=%(prdcr)s
         updtr_start name=%(updtr)s
     """ % {
         "prdcr": "prdcr",
         "updtr": "updtr",
         "xprt": "sock",
         "host": "localhost",
         "port": self.SMP_PORT,
     }
     daemon = LDMSD(port="10000",
                    auth=self.AUTH,
                    auth_opt=self.LDMSD_AUTH_OPT,
                    cfg=cfg)
     daemon.run()
     time.sleep(0.5)
     xprt = ldms.LDMS_xprt_new_with_auth(self.XPRT, self.AUTH,
                                         self.LDMSD_AUTH_OPT)
     rc = ldms.LDMS_xprt_connect_by_name(xprt, "localhost", "10000")
     assert (rc == 0)
     dir_resp = ldms.LDMS_xprt_dir(xprt)
     daemon.term()
     self.assertEqual(dir_resp, ["smp/meminfo"])
Example #2
0
    def test_01_default_with_job(self):
        """Test default options with jobinfo set"""
        cfg = """
            load name=jobinfo plugin=faux_job
            config name=jobinfo
            smplr_add name=smp_job instance=jobinfo interval=2000000
            smplr_start name=smp_job

            load name=clk plugin=clock
            config name=clk
            smplr_add name=smp_clk instance=clk interval=2000000
            smplr_start name=smp_clk
        """ % {
            "host": HOSTNAME,
            "port": self.SMP_PORT,
        }
        daemon = LDMSD(port = self.SMP_PORT, auth = self.AUTH,
                       auth_opt = self.LDMSD_AUTH_OPT,
                       cfg = cfg)
        daemon.run()
        time.sleep(2.0)
        xprt = ldms.LDMS_xprt_new_with_auth(self.XPRT, self.AUTH,
                                            self.LDMSD_AUTH_OPT)
        rc = ldms.LDMS_xprt_connect_by_name(xprt, "localhost", self.SMP_PORT)
        assert(rc == 0)
        self.__verify(xprt, job = self.PRDCR + "/jobinfo",
                            clk = self.PRDCR + "/clk")
        daemon.term()
Example #3
0
 def test_05_bad_pair(self):
     port = LVX_port(1, 0)
     pname = LVX_prdcr(1, 0)
     cfg = """\
     failover_config host=localhost port=%(port)d xprt=%(xprt)s \
                     auto_switch=1 interval=1000000 \
                     peer_name=%(name)s
     failover_start
     """ % {
         "port": port,
         "xprt": XPRT,
         "name": pname,
     }
     p = LDMSD(9999, cfg=cfg, name="bad")
     p.run()
     time.sleep(4)
     ctrl = ldmsdInbandConfig(host="localhost", port=9999, xprt=XPRT)
     resp = ctrl.comm("failover_status")
     DEBUG.resp = resp
     ctrl.close()
     obj = json.loads(resp['msg'])
     self.assertIn(obj['conn_state'], [
         'DISCONNECTED',
         'CONNECTING',
         'PAIRING',
         'PAIRING_RETRY',
     ])
     self.assertEqual(int(obj['flags']['PEERCFG_RECEIVED']), 0)
Example #4
0
 def test_cfg_semi_bad(self):
     cfg = """\
         prdcr_add name=abc bogus=bogus
     """
     daemon = LDMSD(port="10000",
                    auth=self.AUTH,
                    auth_opt=self.LDMSD_AUTH_OPT,
                    cfg=cfg)
     daemon.run()
     time.sleep(1)
     # bad config should not terminate the daemon
     self.assertTrue(daemon.is_running())
     daemon.term()
Example #5
0
    def test_lv1_store(self):
        # ldmsd_aaggregator conf
        shutil.rmtree("csv/csv1", ignore_errors=True)
        os.makedirs("csv/csv1")
        cfg = """\
        prdcr_add name=prdcr xprt=%(xprt)s host=localhost port=%(port)s \
                  interval=1000000 type=active
        prdcr_start name=prdcr

        updtr_add name=updtr interval=%(interval)d offset=50000
        updtr_prdcr_add name=updtr regex=prdcr
        updtr_start name=updtr

        load name=store_csv
        config name=store_csv action=init path=csv buffer=0
        strgp_add name=strgp plugin=store_csv container=csv1 schema=meminfo
        strgp_prdcr_add name=strgp regex=prdcr
        strgp_start name=strgp
        """ % {
            "xprt": self.XPRT,
            "port": self.SMP_PORT,
            "interval": self.AGG_INT,
        }
        agg = LDMSD(port=self.AGG_PORT,
                    cfg=cfg,
                    logfile=self.AGG_LOG,
                    gdb_port=self.AGG_GDB_PORT)
        DEBUG.agg = agg
        log.info("starting aggregator")
        agg.run()
        log.info("collecting data")
        time.sleep(2 + 2 * self.AGG_INT * uS)
        agg.term()
        time.sleep(0.25)
        log.info("Verifying Data")
        # expecting to see a bunch of data, with dt ~ self.SMP_INT usec
        f = open("csv/csv1/meminfo")
        lines = f.readlines()
        lines = lines[1:]  # the [0] is the header
        rexp = re.compile("^(\d+\.\d+),.*$")
        ts = [float(rexp.match(l).group(1)) for l in lines]
        for a, b in zip(ts, ts[1:]):
            dt = b - a
            # allowing 1 millisec error
            self.assertLess(abs(dt - self.SMP_INT * uS), 0.001)
        log.info("%d data timestamps verified" % len(ts))
Example #6
0
    def test_00_default(self):
        """Test default options."""
        cfg = """
            load name=clk plugin=clock
            config name=clk

            smplr_add name=smp_clk instance=clk interval=2000000
            smplr_start name=smp_clk
        """
        daemon = LDMSD(port = self.SMP_PORT, auth = self.AUTH,
                       auth_opt = self.LDMSD_AUTH_OPT,
                       cfg = cfg)
        daemon.run()
        time.sleep(2.0)
        xprt = ldms.LDMS_xprt_new_with_auth(self.XPRT, self.AUTH,
                                            self.LDMSD_AUTH_OPT)
        rc = ldms.LDMS_xprt_connect_by_name(xprt, "localhost", self.SMP_PORT)
        assert(rc == 0)
        self.__verify(xprt, clk = self.PRDCR + "/clk")
        daemon.term()
Example #7
0
    def test_02_customized_options(self):
        """Test customized options"""
        pass
        cfg = """
            load name=job plugin=faux_job_alt
            config name=job instance=myjob
            smplr_add name=smp_job instance=job interval=2000000 offset=0
            smplr_start name=smp_job

            load name=clk plugin=clock
            config name=clk producer=myprdcr component_id=20 instance=myclk \
                   uid=2222 \
                   gid=3333 \
                   perm=0660 \
                   job_set=myjob \
                   job_id=alt_job_id \
                   app_id=alt_app_id \
                   job_start=alt_job_start \
                   job_end=alt_job_end
            smplr_add name=smp_clk instance=clk interval=2000000 offset=0
            smplr_start name=smp_clk
        """ % {
            "host": HOSTNAME,
            "port": self.SMP_PORT,
        }
        daemon = LDMSD(port = self.SMP_PORT, auth = self.AUTH,
                       auth_opt = self.LDMSD_AUTH_OPT,
                       cfg = cfg)
        log.info("")
        log.info("Starting ldmsd")
        daemon.run()
        time.sleep(4.0)

        # verify with uid/gid 2222
        log.info("Verifying with uid/gid 2222")
        xprt = ldms.LDMS_xprt_new_with_auth(self.XPRT, self.AUTH,
                                            {"uid": "2222", "gid": "2222"})
        rc = ldms.LDMS_xprt_connect_by_name(xprt, "localhost", self.SMP_PORT)
        assert(rc == 0)
        self.__verify(xprt, job="myjob", clk="myclk", component_id=20,
                            job_id="alt_job_id", app_id="alt_app_id",
                            job_start="alt_job_start", job_end="alt_job_end")

        # verify with uid/gid 3333
        log.info("Verifying with uid/gid 3333")
        xprt = ldms.LDMS_xprt_new_with_auth(self.XPRT, self.AUTH,
                                            {"uid": "3333", "gid": "3333"})
        rc = ldms.LDMS_xprt_connect_by_name(xprt, "localhost", self.SMP_PORT)
        assert(rc == 0)
        self.__verify(xprt, job="myjob", clk="myclk", component_id=20,
                            job_id="alt_job_id", app_id="alt_app_id",
                            job_start="alt_job_start", job_end="alt_job_end")

        log.info("Verifying with uid/gid 4444")
        xprt = ldms.LDMS_xprt_new_with_auth(self.XPRT, self.AUTH,
                                            {"uid": "4444", "gid": "4444"})
        rc = ldms.LDMS_xprt_connect_by_name(xprt, "localhost", self.SMP_PORT)
        assert(rc == 0)
        # shouldn't see 'myclk'
        self.__verify(xprt, job="myjob", clk=None, component_id=20,
                            job_id="alt_job_id", app_id="alt_app_id",
                            job_start="alt_job_start", job_end="alt_job_end")

        # terminate ldmsd
        daemon.term()
Example #8
0
class TestLDMSDLongConfig(unittest.TestCase):
    """Test cases focusing on long configuration line"""
    XPRT = "sock"
    SMP_PORT = "10001"
    SMP_LOG = "smp.log"
    LEN = 65536

    # LDMSD instances
    smp = None

    @classmethod
    def setUpClass(cls):
        pass

    @classmethod
    def tearDownClass(cls):
        pass

    def setUp(self):
        log.debug("---- %s ----" % self._testMethodName)

    def tearDown(self):
        log.debug("----------------------------")
        if self.smp:
            del self.smp

    def getMaxMsgSz(self, mode=["configFile", "ldmsctl", "ldmsd_controller"]):
        if mode == "configFile":
            return 65536  # ldmsd config file max rec len is 8192.
        else:
            return 1100000  # ldms_xprt_msg_max() for socket is 1048536. This could be varied by system to system.

    def getGreetingCfgCmd(self, _len):
        return """greeting name=%s""" % ("a" * _len)

    def is_logfile_ready(self, filename):
        """Check if the ldmsd log file is created.
        """
        if os.path.isfile(filename):
            return True
        else:
            return False

    def is_msg_in_logfile(self, filename, msg):
        """Find a line in the ldmsd log file that contains the given msg
        @param msg      message to search for in the log file
        """
        with open(filename, 'r') as loghandle:
            line = loghandle.readline()
            while line:
                if msg in line:
                    return True
                line = loghandle.readline()
            return False

    def getLogMsg(self, len):
        return "strlen(name)=%s" % len

    def test_00_config_file(self):
        name_len = self.getMaxMsgSz("configFile")
        logfile = "config_file.log"
        smp_cfg = self.getGreetingCfgCmd(name_len)
        self.smp = LDMSD(port=self.SMP_PORT,
                         xprt=self.XPRT,
                         cfg=smp_cfg,
                         logfile=logfile,
                         verbose="DEBUG")
        log.info("starting sampler")
        self.smp.run()
        if not on_timeout(self.is_logfile_ready, filename=logfile):
            raise Exception(
                "ldmsd log file isn't created within 1 second after ldmsd is started."
            )
        msg = self.getLogMsg(name_len)
        self.assertTrue(
            on_timeout(self.is_msg_in_logfile, filename=logfile, msg=msg))

    def test_01_ldmsd_controller(self):
        logfile = "ldmsd_controller.log"
        self.smp = LDMSD(port=self.SMP_PORT,
                         xprt=self.XPRT,
                         verbose="DEBUG",
                         logfile=logfile)
        self.smp.run()
        if not on_timeout(self.is_logfile_ready, filename=logfile):
            raise Exception(
                "ldmsd log file isn't created within 1 second after ldmsd is started."
            )
        name_len = self.getMaxMsgSz("ldmsd_controller")
        line = self.getGreetingCfgCmd(name_len)
        cfg = tempfile.NamedTemporaryFile()
        cfg.write(line)
        cfg.file.flush()
        # ldmsd_controller subprocess
        ctrl = LDMSD_Controller(port=self.SMP_PORT,
                                xprt=self.XPRT,
                                source=cfg.name)
        ctrl.run()
        msg = self.getLogMsg(name_len)
        self.assertTrue(
            on_timeout(self.is_msg_in_logfile, filename=logfile, msg=msg))

    def test_02_ldmsctl(self):
        logfile = "ldmsctl.log"
        self.smp = LDMSD(port=self.SMP_PORT,
                         xprt=self.XPRT,
                         verbose="DEBUG",
                         logfile=logfile)
        self.smp.run()
        if not on_timeout(self.is_logfile_ready, filename=logfile):
            raise Exception(
                "ldmsd log file isn't created within 1 second after ldmsd is started."
            )
        name_len = self.getMaxMsgSz("ldmsctl")
        line = self.getGreetingCfgCmd(name_len)
        cfg = tempfile.NamedTemporaryFile()
        cfg.write(line)
        cfg.file.flush()
        # ldmsd_controller subprocess
        ctrl = LDMSD_Controller(port=self.SMP_PORT,
                                xprt=self.XPRT,
                                source=cfg.name,
                                ldmsctl=True)
        ctrl.run()
        msg = self.getLogMsg(name_len)
        self.assertTrue(
            on_timeout(self.is_msg_in_logfile, filename=logfile, msg=msg))
Example #9
0
    def setUpClass(cls):
        # Need 3 ldmsd .. the config objects are for aggregators
        log.info("Setting up TestLDMSDPerm")
        try:
            # samplers (producers)
            for prdcr in cls.PRDCRS:
                smp_cfg = """
                    load name=meminfo
                    config name=meminfo producer=%(prdcr)s \
                           instance=%(prdcr)s/meminfo schema=meminfo
                    start name=meminfo interval=1000000 offset=0
                """ % prdcr
                log.debug("smp_cfg: %s" % smp_cfg)
                ldmsd = LDMSD(port=prdcr["port"],
                              xprt=cls.XPRT,
                              auth=cls.AUTH,
                              auth_opt=cls.LDMSD_AUTH_OPT,
                              cfg=smp_cfg,
                              logfile=prdcr["logfile"])
                log.info("starting %s" % prdcr["prdcr"])
                ldmsd.run()
                cls.prdcrs.append(ldmsd)

            # aggregator
            cls.agg = LDMSD(port=cls.AGG_PORT,
                            xprt=cls.XPRT,
                            auth=cls.AUTH,
                            auth_opt=cls.LDMSD_AUTH_OPT,
                            logfile=cls.AGG_LOG)
            log.info("starting aggregator")
            cls.agg.run()
            time.sleep(1)

            # need to config separately so that prdcr,updtr pairs are owned by
            # different users.
            log.info("configuring aggregator")
            for prdcr in cls.PRDCRS:
                log.info("....adding %(prdcr)s" % prdcr)
                agg_cfg = """\
                prdcr_add name=%(prdcr)s xprt=%(xprt)s host=localhost \
                          port=%(port)s type=active interval=1000000 \
                          perm=0600
                prdcr_start name=%(prdcr)s
                updtr_add name=%(updtr)s interval=1000000 offset=500000 \
                          perm=0600
                updtr_prdcr_add name=%(updtr)s regex=%(prdcr)s
                updtr_start name=%(updtr)s
                """ % prdcr
                log.debug("agg_cfg: %s" % agg_cfg)
                ctrl = ldmsdInbandConfig(host="localhost",
                                         port=cls.AGG_PORT,
                                         xprt=prdcr["xprt"],
                                         auth=prdcr["auth"],
                                         auth_opt=prdcr["auth_opt"])
                for cmd in agg_cfg.splitlines():
                    cmd = cmd.strip()
                    if not cmd:
                        continue
                    log.debug("cmd: %s" % cmd)
                    req = LDMSD_Request.from_str(cmd)
                    req.send(ctrl)
                    resp = req.receive(ctrl)
                    errcode = resp["errcode"]
                    if errcode:
                        raise RuntimeError("LDMSD Ctrl errcode: %d" % errcode)
                ctrl.close()
            time.sleep(1)
            # Verify that the agg is working as configured
            log.info("verifying aggregator")
            xprt = ldms.LDMS_xprt_new_with_auth(cls.XPRT, cls.AUTH,
                                                cls.LDMSD_AUTH_OPT)
            rc = ldms.LDMS_xprt_connect_by_name(xprt, "localhost",
                                                cls.AGG_PORT)
            if rc:
                raise RuntimeError("LDMS connect failed: %d" % rc)
            _dir = ldms.LDMS_xprt_dir(xprt)
            log.debug("dirs: %s" % str(_dir))
            ldms.ldms_xprt_close(xprt)
            _edirs = [p["prdcr"] + "/meminfo" for p in cls.PRDCRS]
            if set(_dir) != set(_edirs):
                raise RuntimeError("Bad set ...")
        except:
            del cls.agg
            del cls.prdcrs
            raise
        log.info("TestLDMSDPerm set up done")
Example #10
0
class SlurmTest(unittest.TestCase):
    """A test case for slurm sampler"""
    XPRT = "sock"
    SAMP_PORT = "10001"
    SAMP_LOG = DIR + "/samp.log" # set "<PATH>" to enable sampler logging
    SAMP_GDB_PORT = None # set to "20001" and remote-attach for debugging

    JOB_SIZE = 4
    TASK_SIZE = 8

    @classmethod
    def setUpClass(self):
        self.expt = None
        self.samp = None
        self.slurm_set = None # the `slurm` set
        self.mem_set = None # the `mem` (meminfo) set
        self.conn = None # ldms connection
        ldms.ldms_init(512*1024*1024) # 512MB should suffice
        try:
            cfg = """\
            load name=slurm plugin=slurm_sampler
            config name=slurm instance=slurm stream=slurm \
                              job_count=%(JOB_SIZE)d task_count=%(TASK_SIZE)d

            load name=mem plugin=meminfo
            config name=mem job_set=slurm instance=mem

            smplr_add name=mem_smplr instance=mem interval=1000000 offset=0
            smplr_start name=mem_smplr
            """ % { k: getattr(self, k) for k in dir(self) }

            self.samp = LDMSD(port=self.SAMP_PORT, cfg = cfg,
                             logfile = self.SAMP_LOG,
                             gdb_port = self.SAMP_GDB_PORT)
            D.samp = self.samp
            log.info("Starting sampler")
            self.samp.run()
            self.conn = ldms.LDMS_xprt_new(self.XPRT)
            D.conn = self.conn
            self.conn.connectByName("localhost", self.SAMP_PORT)
            self.slurm_set = self.conn.lookupSet("slurm", 0)
            D.slurm_set = self.slurm_set
            self.mem_set = self.conn.lookupSet("mem", 0)
            D.mem_set = self.mem_set
            expt = {
                    "component_id" : [0L] * self.JOB_SIZE,
                    "job_id"       : [0L] * self.JOB_SIZE,
                    "app_id"       : [0L] * self.JOB_SIZE,
                    "current_slot" :  0L,
                    "job_state"    : [0L] * self.JOB_SIZE,
                    "job_tstamp"   : [0L] * self.JOB_SIZE,
                    "job_size"     : [0L] * self.JOB_SIZE,
                    "job_uid"      : [0L] * self.JOB_SIZE,
                    "job_gid"      : [0L] * self.JOB_SIZE,
                    "job_start"    : [0L] * self.JOB_SIZE,
                    "job_end"      : [0L] * self.JOB_SIZE,
                    "node_count"   : [0L] * self.JOB_SIZE,
                    "task_count"   : [0L] * self.JOB_SIZE,
                }
            task_keys = ["task_pid", "task_rank", "task_exit_status"]
            expt.update({ "%s_%d" % (k, i) : [0L] * self.TASK_SIZE \
                                    for k in task_keys \
                                    for i in range(0, self.JOB_SIZE)
                        })
            self.expt = expt
            log.info("--- Done setting up SlurmTest ---")