def test_cfg_good(self): cfg = """\ prdcr_add name=%(prdcr)s xprt=%(xprt)s host=%(host)s port=%(port)s \ type=active interval=1000000 prdcr_start name=%(prdcr)s updtr_add name=%(updtr)s interval=1000000 offset=500000 updtr_prdcr_add name=%(updtr)s regex=%(prdcr)s updtr_start name=%(updtr)s """ % { "prdcr": "prdcr", "updtr": "updtr", "xprt": "sock", "host": "localhost", "port": self.SMP_PORT, } daemon = LDMSD(port="10000", auth=self.AUTH, auth_opt=self.LDMSD_AUTH_OPT, cfg=cfg) daemon.run() time.sleep(0.5) xprt = ldms.LDMS_xprt_new_with_auth(self.XPRT, self.AUTH, self.LDMSD_AUTH_OPT) rc = ldms.LDMS_xprt_connect_by_name(xprt, "localhost", "10000") assert (rc == 0) dir_resp = ldms.LDMS_xprt_dir(xprt) daemon.term() self.assertEqual(dir_resp, ["smp/meminfo"])
def test_01_default_with_job(self): """Test default options with jobinfo set""" cfg = """ load name=jobinfo plugin=faux_job config name=jobinfo smplr_add name=smp_job instance=jobinfo interval=2000000 smplr_start name=smp_job load name=clk plugin=clock config name=clk smplr_add name=smp_clk instance=clk interval=2000000 smplr_start name=smp_clk """ % { "host": HOSTNAME, "port": self.SMP_PORT, } daemon = LDMSD(port = self.SMP_PORT, auth = self.AUTH, auth_opt = self.LDMSD_AUTH_OPT, cfg = cfg) daemon.run() time.sleep(2.0) xprt = ldms.LDMS_xprt_new_with_auth(self.XPRT, self.AUTH, self.LDMSD_AUTH_OPT) rc = ldms.LDMS_xprt_connect_by_name(xprt, "localhost", self.SMP_PORT) assert(rc == 0) self.__verify(xprt, job = self.PRDCR + "/jobinfo", clk = self.PRDCR + "/clk") daemon.term()
def test_05_bad_pair(self): port = LVX_port(1, 0) pname = LVX_prdcr(1, 0) cfg = """\ failover_config host=localhost port=%(port)d xprt=%(xprt)s \ auto_switch=1 interval=1000000 \ peer_name=%(name)s failover_start """ % { "port": port, "xprt": XPRT, "name": pname, } p = LDMSD(9999, cfg=cfg, name="bad") p.run() time.sleep(4) ctrl = ldmsdInbandConfig(host="localhost", port=9999, xprt=XPRT) resp = ctrl.comm("failover_status") DEBUG.resp = resp ctrl.close() obj = json.loads(resp['msg']) self.assertIn(obj['conn_state'], [ 'DISCONNECTED', 'CONNECTING', 'PAIRING', 'PAIRING_RETRY', ]) self.assertEqual(int(obj['flags']['PEERCFG_RECEIVED']), 0)
def test_cfg_semi_bad(self): cfg = """\ prdcr_add name=abc bogus=bogus """ daemon = LDMSD(port="10000", auth=self.AUTH, auth_opt=self.LDMSD_AUTH_OPT, cfg=cfg) daemon.run() time.sleep(1) # bad config should not terminate the daemon self.assertTrue(daemon.is_running()) daemon.term()
def test_lv1_store(self): # ldmsd_aaggregator conf shutil.rmtree("csv/csv1", ignore_errors=True) os.makedirs("csv/csv1") cfg = """\ prdcr_add name=prdcr xprt=%(xprt)s host=localhost port=%(port)s \ interval=1000000 type=active prdcr_start name=prdcr updtr_add name=updtr interval=%(interval)d offset=50000 updtr_prdcr_add name=updtr regex=prdcr updtr_start name=updtr load name=store_csv config name=store_csv action=init path=csv buffer=0 strgp_add name=strgp plugin=store_csv container=csv1 schema=meminfo strgp_prdcr_add name=strgp regex=prdcr strgp_start name=strgp """ % { "xprt": self.XPRT, "port": self.SMP_PORT, "interval": self.AGG_INT, } agg = LDMSD(port=self.AGG_PORT, cfg=cfg, logfile=self.AGG_LOG, gdb_port=self.AGG_GDB_PORT) DEBUG.agg = agg log.info("starting aggregator") agg.run() log.info("collecting data") time.sleep(2 + 2 * self.AGG_INT * uS) agg.term() time.sleep(0.25) log.info("Verifying Data") # expecting to see a bunch of data, with dt ~ self.SMP_INT usec f = open("csv/csv1/meminfo") lines = f.readlines() lines = lines[1:] # the [0] is the header rexp = re.compile("^(\d+\.\d+),.*$") ts = [float(rexp.match(l).group(1)) for l in lines] for a, b in zip(ts, ts[1:]): dt = b - a # allowing 1 millisec error self.assertLess(abs(dt - self.SMP_INT * uS), 0.001) log.info("%d data timestamps verified" % len(ts))
def test_00_default(self): """Test default options.""" cfg = """ load name=clk plugin=clock config name=clk smplr_add name=smp_clk instance=clk interval=2000000 smplr_start name=smp_clk """ daemon = LDMSD(port = self.SMP_PORT, auth = self.AUTH, auth_opt = self.LDMSD_AUTH_OPT, cfg = cfg) daemon.run() time.sleep(2.0) xprt = ldms.LDMS_xprt_new_with_auth(self.XPRT, self.AUTH, self.LDMSD_AUTH_OPT) rc = ldms.LDMS_xprt_connect_by_name(xprt, "localhost", self.SMP_PORT) assert(rc == 0) self.__verify(xprt, clk = self.PRDCR + "/clk") daemon.term()
def test_02_customized_options(self): """Test customized options""" pass cfg = """ load name=job plugin=faux_job_alt config name=job instance=myjob smplr_add name=smp_job instance=job interval=2000000 offset=0 smplr_start name=smp_job load name=clk plugin=clock config name=clk producer=myprdcr component_id=20 instance=myclk \ uid=2222 \ gid=3333 \ perm=0660 \ job_set=myjob \ job_id=alt_job_id \ app_id=alt_app_id \ job_start=alt_job_start \ job_end=alt_job_end smplr_add name=smp_clk instance=clk interval=2000000 offset=0 smplr_start name=smp_clk """ % { "host": HOSTNAME, "port": self.SMP_PORT, } daemon = LDMSD(port = self.SMP_PORT, auth = self.AUTH, auth_opt = self.LDMSD_AUTH_OPT, cfg = cfg) log.info("") log.info("Starting ldmsd") daemon.run() time.sleep(4.0) # verify with uid/gid 2222 log.info("Verifying with uid/gid 2222") xprt = ldms.LDMS_xprt_new_with_auth(self.XPRT, self.AUTH, {"uid": "2222", "gid": "2222"}) rc = ldms.LDMS_xprt_connect_by_name(xprt, "localhost", self.SMP_PORT) assert(rc == 0) self.__verify(xprt, job="myjob", clk="myclk", component_id=20, job_id="alt_job_id", app_id="alt_app_id", job_start="alt_job_start", job_end="alt_job_end") # verify with uid/gid 3333 log.info("Verifying with uid/gid 3333") xprt = ldms.LDMS_xprt_new_with_auth(self.XPRT, self.AUTH, {"uid": "3333", "gid": "3333"}) rc = ldms.LDMS_xprt_connect_by_name(xprt, "localhost", self.SMP_PORT) assert(rc == 0) self.__verify(xprt, job="myjob", clk="myclk", component_id=20, job_id="alt_job_id", app_id="alt_app_id", job_start="alt_job_start", job_end="alt_job_end") log.info("Verifying with uid/gid 4444") xprt = ldms.LDMS_xprt_new_with_auth(self.XPRT, self.AUTH, {"uid": "4444", "gid": "4444"}) rc = ldms.LDMS_xprt_connect_by_name(xprt, "localhost", self.SMP_PORT) assert(rc == 0) # shouldn't see 'myclk' self.__verify(xprt, job="myjob", clk=None, component_id=20, job_id="alt_job_id", app_id="alt_app_id", job_start="alt_job_start", job_end="alt_job_end") # terminate ldmsd daemon.term()
class TestLDMSDLongConfig(unittest.TestCase): """Test cases focusing on long configuration line""" XPRT = "sock" SMP_PORT = "10001" SMP_LOG = "smp.log" LEN = 65536 # LDMSD instances smp = None @classmethod def setUpClass(cls): pass @classmethod def tearDownClass(cls): pass def setUp(self): log.debug("---- %s ----" % self._testMethodName) def tearDown(self): log.debug("----------------------------") if self.smp: del self.smp def getMaxMsgSz(self, mode=["configFile", "ldmsctl", "ldmsd_controller"]): if mode == "configFile": return 65536 # ldmsd config file max rec len is 8192. else: return 1100000 # ldms_xprt_msg_max() for socket is 1048536. This could be varied by system to system. def getGreetingCfgCmd(self, _len): return """greeting name=%s""" % ("a" * _len) def is_logfile_ready(self, filename): """Check if the ldmsd log file is created. """ if os.path.isfile(filename): return True else: return False def is_msg_in_logfile(self, filename, msg): """Find a line in the ldmsd log file that contains the given msg @param msg message to search for in the log file """ with open(filename, 'r') as loghandle: line = loghandle.readline() while line: if msg in line: return True line = loghandle.readline() return False def getLogMsg(self, len): return "strlen(name)=%s" % len def test_00_config_file(self): name_len = self.getMaxMsgSz("configFile") logfile = "config_file.log" smp_cfg = self.getGreetingCfgCmd(name_len) self.smp = LDMSD(port=self.SMP_PORT, xprt=self.XPRT, cfg=smp_cfg, logfile=logfile, verbose="DEBUG") log.info("starting sampler") self.smp.run() if not on_timeout(self.is_logfile_ready, filename=logfile): raise Exception( "ldmsd log file isn't created within 1 second after ldmsd is started." ) msg = self.getLogMsg(name_len) self.assertTrue( on_timeout(self.is_msg_in_logfile, filename=logfile, msg=msg)) def test_01_ldmsd_controller(self): logfile = "ldmsd_controller.log" self.smp = LDMSD(port=self.SMP_PORT, xprt=self.XPRT, verbose="DEBUG", logfile=logfile) self.smp.run() if not on_timeout(self.is_logfile_ready, filename=logfile): raise Exception( "ldmsd log file isn't created within 1 second after ldmsd is started." ) name_len = self.getMaxMsgSz("ldmsd_controller") line = self.getGreetingCfgCmd(name_len) cfg = tempfile.NamedTemporaryFile() cfg.write(line) cfg.file.flush() # ldmsd_controller subprocess ctrl = LDMSD_Controller(port=self.SMP_PORT, xprt=self.XPRT, source=cfg.name) ctrl.run() msg = self.getLogMsg(name_len) self.assertTrue( on_timeout(self.is_msg_in_logfile, filename=logfile, msg=msg)) def test_02_ldmsctl(self): logfile = "ldmsctl.log" self.smp = LDMSD(port=self.SMP_PORT, xprt=self.XPRT, verbose="DEBUG", logfile=logfile) self.smp.run() if not on_timeout(self.is_logfile_ready, filename=logfile): raise Exception( "ldmsd log file isn't created within 1 second after ldmsd is started." ) name_len = self.getMaxMsgSz("ldmsctl") line = self.getGreetingCfgCmd(name_len) cfg = tempfile.NamedTemporaryFile() cfg.write(line) cfg.file.flush() # ldmsd_controller subprocess ctrl = LDMSD_Controller(port=self.SMP_PORT, xprt=self.XPRT, source=cfg.name, ldmsctl=True) ctrl.run() msg = self.getLogMsg(name_len) self.assertTrue( on_timeout(self.is_msg_in_logfile, filename=logfile, msg=msg))
def setUpClass(cls): # Need 3 ldmsd .. the config objects are for aggregators log.info("Setting up TestLDMSDPerm") try: # samplers (producers) for prdcr in cls.PRDCRS: smp_cfg = """ load name=meminfo config name=meminfo producer=%(prdcr)s \ instance=%(prdcr)s/meminfo schema=meminfo start name=meminfo interval=1000000 offset=0 """ % prdcr log.debug("smp_cfg: %s" % smp_cfg) ldmsd = LDMSD(port=prdcr["port"], xprt=cls.XPRT, auth=cls.AUTH, auth_opt=cls.LDMSD_AUTH_OPT, cfg=smp_cfg, logfile=prdcr["logfile"]) log.info("starting %s" % prdcr["prdcr"]) ldmsd.run() cls.prdcrs.append(ldmsd) # aggregator cls.agg = LDMSD(port=cls.AGG_PORT, xprt=cls.XPRT, auth=cls.AUTH, auth_opt=cls.LDMSD_AUTH_OPT, logfile=cls.AGG_LOG) log.info("starting aggregator") cls.agg.run() time.sleep(1) # need to config separately so that prdcr,updtr pairs are owned by # different users. log.info("configuring aggregator") for prdcr in cls.PRDCRS: log.info("....adding %(prdcr)s" % prdcr) agg_cfg = """\ prdcr_add name=%(prdcr)s xprt=%(xprt)s host=localhost \ port=%(port)s type=active interval=1000000 \ perm=0600 prdcr_start name=%(prdcr)s updtr_add name=%(updtr)s interval=1000000 offset=500000 \ perm=0600 updtr_prdcr_add name=%(updtr)s regex=%(prdcr)s updtr_start name=%(updtr)s """ % prdcr log.debug("agg_cfg: %s" % agg_cfg) ctrl = ldmsdInbandConfig(host="localhost", port=cls.AGG_PORT, xprt=prdcr["xprt"], auth=prdcr["auth"], auth_opt=prdcr["auth_opt"]) for cmd in agg_cfg.splitlines(): cmd = cmd.strip() if not cmd: continue log.debug("cmd: %s" % cmd) req = LDMSD_Request.from_str(cmd) req.send(ctrl) resp = req.receive(ctrl) errcode = resp["errcode"] if errcode: raise RuntimeError("LDMSD Ctrl errcode: %d" % errcode) ctrl.close() time.sleep(1) # Verify that the agg is working as configured log.info("verifying aggregator") xprt = ldms.LDMS_xprt_new_with_auth(cls.XPRT, cls.AUTH, cls.LDMSD_AUTH_OPT) rc = ldms.LDMS_xprt_connect_by_name(xprt, "localhost", cls.AGG_PORT) if rc: raise RuntimeError("LDMS connect failed: %d" % rc) _dir = ldms.LDMS_xprt_dir(xprt) log.debug("dirs: %s" % str(_dir)) ldms.ldms_xprt_close(xprt) _edirs = [p["prdcr"] + "/meminfo" for p in cls.PRDCRS] if set(_dir) != set(_edirs): raise RuntimeError("Bad set ...") except: del cls.agg del cls.prdcrs raise log.info("TestLDMSDPerm set up done")
class SlurmTest(unittest.TestCase): """A test case for slurm sampler""" XPRT = "sock" SAMP_PORT = "10001" SAMP_LOG = DIR + "/samp.log" # set "<PATH>" to enable sampler logging SAMP_GDB_PORT = None # set to "20001" and remote-attach for debugging JOB_SIZE = 4 TASK_SIZE = 8 @classmethod def setUpClass(self): self.expt = None self.samp = None self.slurm_set = None # the `slurm` set self.mem_set = None # the `mem` (meminfo) set self.conn = None # ldms connection ldms.ldms_init(512*1024*1024) # 512MB should suffice try: cfg = """\ load name=slurm plugin=slurm_sampler config name=slurm instance=slurm stream=slurm \ job_count=%(JOB_SIZE)d task_count=%(TASK_SIZE)d load name=mem plugin=meminfo config name=mem job_set=slurm instance=mem smplr_add name=mem_smplr instance=mem interval=1000000 offset=0 smplr_start name=mem_smplr """ % { k: getattr(self, k) for k in dir(self) } self.samp = LDMSD(port=self.SAMP_PORT, cfg = cfg, logfile = self.SAMP_LOG, gdb_port = self.SAMP_GDB_PORT) D.samp = self.samp log.info("Starting sampler") self.samp.run() self.conn = ldms.LDMS_xprt_new(self.XPRT) D.conn = self.conn self.conn.connectByName("localhost", self.SAMP_PORT) self.slurm_set = self.conn.lookupSet("slurm", 0) D.slurm_set = self.slurm_set self.mem_set = self.conn.lookupSet("mem", 0) D.mem_set = self.mem_set expt = { "component_id" : [0L] * self.JOB_SIZE, "job_id" : [0L] * self.JOB_SIZE, "app_id" : [0L] * self.JOB_SIZE, "current_slot" : 0L, "job_state" : [0L] * self.JOB_SIZE, "job_tstamp" : [0L] * self.JOB_SIZE, "job_size" : [0L] * self.JOB_SIZE, "job_uid" : [0L] * self.JOB_SIZE, "job_gid" : [0L] * self.JOB_SIZE, "job_start" : [0L] * self.JOB_SIZE, "job_end" : [0L] * self.JOB_SIZE, "node_count" : [0L] * self.JOB_SIZE, "task_count" : [0L] * self.JOB_SIZE, } task_keys = ["task_pid", "task_rank", "task_exit_status"] expt.update({ "%s_%d" % (k, i) : [0L] * self.TASK_SIZE \ for k in task_keys \ for i in range(0, self.JOB_SIZE) }) self.expt = expt log.info("--- Done setting up SlurmTest ---")