def test_2ndLevel_agg_instance_existed(self, logger, cfg, obj, agg_2ndLevel_conn): passed_sec = 0 timeout = 2 * microsec2sec(cfg.LDMSD_UPDATE_INTERVAL) inst_dir = ldms.LDMS_xprt_dir(agg_2ndLevel_conn) assert(inst_dir is not None) while (len(inst_dir) == 0) and (passed_sec < timeout): passed_sec += 1 sleep(1) inst_dir = ldms.LDMS_xprt_dir(agg_2ndLevel_conn) assert(inst_dir is not None) logger.info("Wait time: {0}".format(passed_sec)) assert(obj['instance'] in inst_dir)
def test_1stLevel_agg_after_revived(self, logger, cfg, obj, agg_1stLevel_conn): passed_sec = 0 timeout = 2 * microsec2sec(cfg.LDMSD_UPDATE_INTERVAL) # lookup + update. 2 times for safety inst_dir = ldms.LDMS_xprt_dir(agg_1stLevel_conn) assert(inst_dir is not None) while (len(inst_dir) == 0) and (passed_sec < timeout): passed_sec += 1 sleep(1) inst_dir = ldms.LDMS_xprt_dir(agg_1stLevel_conn) assert(inst_dir is not None) logger.info("Wait time: {0}".format(passed_sec)) assert(obj['instance'] in inst_dir)
def test_agg_1stLevel_instance_after_samplerd_die(self, logger, cfg, obj, agg_1stLevel_conn): passed_sec = 0 timeout = microsec2sec(cfg.LDMSD_UPDATE_INTERVAL) inst_dir = ldms.LDMS_xprt_dir(agg_1stLevel_conn) assert(inst_dir is not None) while (len(inst_dir) > 0) and (passed_sec < timeout): passed_sec += 1 sleep(1) inst_dir = ldms.LDMS_xprt_dir(agg_1stLevel_conn) assert(inst_dir is not None) logger.info("Wait time: {0}".format(passed_sec)) assert(obj['instance'] not in inst_dir)
def test_2ndLevel_instance_after_1stLevel_revived(self, logger, cfg, obj, agg_2ndLevel_conn): passed_sec = 0 timeout = microsec2sec(cfg.LDMSD_RECONNECT_INTERVAL) + 1 inst_dir = ldms.LDMS_xprt_dir(agg_2ndLevel_conn) assert (inst_dir is not None) while (len(inst_dir) == 0) and (passed_sec < timeout): passed_sec += 1 sleep(1) inst_dir = ldms.LDMS_xprt_dir(agg_2ndLevel_conn) assert (inst_dir is not None) logger.info("Wait time: {0}".format(passed_sec)) assert (obj['instance'] in inst_dir)
def test_agg_after_samplerd_die(self, logger, cfg, obj, agg_conn): passed_sec = 0 # the latest aggregator recognize that the samplerd is gone # would be at the update time. timeout = microsec2sec(cfg.LDMSD_UPDATE_INTERVAL) inst_dir = ldms.LDMS_xprt_dir(agg_conn) assert (inst_dir is not None) while (len(inst_dir) > 0) and (passed_sec < timeout): passed_sec += 1 sleep(1) inst_dir = ldms.LDMS_xprt_dir(agg_conn) assert (inst_dir is not None) logger.info("Wait time: {0}".format(passed_sec)) assert (obj['instance'] not in inst_dir)
def test_agg_after_samplerd_revived(self, logger, obj, cfg, agg_conn): passed_sec = 0 # The default reconnect time is 20 seconds. There is no # way to change this from the static configuration timeout = 20 + 2 * microsec2sec(cfg.LDMSD_UPDATE_INTERVAL) inst_dir = ldms.LDMS_xprt_dir(agg_conn) assert (inst_dir is not None) while (len(inst_dir) == 0) and (passed_sec < timeout): passed_sec += 1 sleep(1) inst_dir = ldms.LDMS_xprt_dir(agg_conn) assert (inst_dir is not None) logger.info("Wait time: {0}".format(passed_sec)) assert (obj['instance'] in inst_dir)
def test_samplerd_instance_existence(self, logger, cfg, obj, samplerd_conn): logger.debug("--- before ldms_xprt_dir") inst_dir = ldms.LDMS_xprt_dir(samplerd_conn) logger.debug("--- after ldms_xprt_dir") assert(inst_dir is not None) assert(len(inst_dir) == cfg.NUM_TEST_INSTANCES_PER_HOST) assert(obj['instance'] in inst_dir)
def test_01_verify(self): """Verify data in the storage""" x = ldms.LDMS_xprt_new(self.XPRT) rc = ldms.LDMS_xprt_connect_by_name(x, "localhost", self.SMP_PORT) if rc: log.error("rc: %d" % rc) assert (rc == 0) dlist = ldms.LDMS_xprt_dir(x) _sets = [] log.info("Looking up sets") for name in dlist: s = ldms.LDMS_xprt_lookup(x, name, 0) assert (s) _sets.append(s) log.info("Collecting data from LDMS for comparison") data = set() for i in range(0, 10): # update first for s in _sets: s.update() for s in _sets: l = ldms_set_as_tuple(s, with_ts=True) data.add(l) dlen = len(l) time.sleep(1) time.sleep(1) # to make sure that the last data point has been stored log.info("Verifying...") rf = ResultFile(self.STORE_PATH) # Verify the computed results rf.verify() # Now, verify that the stored raw is good names = [s.metric_name_get(k) for k, v in s.iter_items()] names = ["#Time"] + names csv_data = set(r.as_tuple(names) for r in rf) self.assertLessEqual(data, csv_data)
def test_env(self): os.putenv("LDMS_AUTH_FILE", self.AUTH_OPT["conf"]) xprt = ldms.LDMS_xprt_new_with_auth(self.XPRT, self.AUTH, None) rc = ldms.LDMS_xprt_connect_by_name(xprt, "localhost", self.PORT) self.assertEqual(rc, 0) _dir = ldms.LDMS_xprt_dir(xprt) self.assertEqual(_dir, ["smp/meminfo"])
def test_01_verify(self): """Verify data in the storage""" x = ldms.LDMS_xprt_new(self.XPRT) rc = ldms.LDMS_xprt_connect_by_name(x, "localhost", self.SMP_PORT) if rc: log.error("rc: %d" % rc) assert(rc == 0) dlist = ldms.LDMS_xprt_dir(x) _sets = [] log.info("Looking up sets") self.assertEqual(len(dlist), 1) s = ldms.LDMS_xprt_lookup(x, dlist[0], 0) log.info("Collecting data from LDMS for comparison") data = set() for i in range(0, 10): # update first s.update() l = ldms_set_as_tuple(s) data.add(l) time.sleep(1) time.sleep(1) # to make sure that the last data point has been stored log.info("Verifying...") csv_data = LdmsCsv("test_store_csv/csv") csv_data = set(csv_data) self.assertLessEqual(data, csv_data)
def __verify(self, lvl, _id, failover=False, empty=False): prdcr = LVX_prdcr(lvl, _id) log.info("Verifying %s" % prdcr) port = LVX_port(lvl, _id) x = ldms.LDMS_xprt_new(XPRT) rc = ldms.LDMS_xprt_connect_by_name(x, "localhost", str(port)) DEBUG.x = x DEBUG.rc = rc assert (rc == 0) s0 = set() N = 2**lvl off = N * _id if empty: s0 = set() else: s0 = set([LVX_prdcr(0, i) + "/" + s \ for i in range(off, off + N) \ for s in ["meminfo", "vmstat"] ]) if failover: off = N * (_id ^ 1) s0.update([LVX_prdcr(0, i) + "/" + s \ for i in range(off, off + N) \ for s in ["meminfo", "vmstat"] ]) dirs = ldms.LDMS_xprt_dir(x) s1 = set(dirs) DEBUG.s0 = s0 DEBUG.s1 = s1 msg = "ldmsd (%d, %d) verification failed, expecting %s, but got %s" % ( lvl, _id, str(s0), str(s1)) self.assertEqual(s0, s1, msg)
def test_cfg_good(self): cfg = """\ prdcr_add name=%(prdcr)s xprt=%(xprt)s host=%(host)s port=%(port)s \ type=active interval=1000000 prdcr_start name=%(prdcr)s updtr_add name=%(updtr)s interval=1000000 offset=500000 updtr_prdcr_add name=%(updtr)s regex=%(prdcr)s updtr_start name=%(updtr)s """ % { "prdcr": "prdcr", "updtr": "updtr", "xprt": "sock", "host": "localhost", "port": self.SMP_PORT, } daemon = LDMSD(port="10000", auth=self.AUTH, auth_opt=self.LDMSD_AUTH_OPT, cfg=cfg) daemon.run() time.sleep(0.5) xprt = ldms.LDMS_xprt_new_with_auth(self.XPRT, self.AUTH, self.LDMSD_AUTH_OPT) rc = ldms.LDMS_xprt_connect_by_name(xprt, "localhost", "10000") assert (rc == 0) dir_resp = ldms.LDMS_xprt_dir(xprt) daemon.term() self.assertEqual(dir_resp, ["smp/meminfo"])
def test_1stLevel_instance_after_samplerd_revived(self, logger, cfg, obj, agg_1stLevel_conn): passed_sec = 0 # wait time must be at least the reconnect interval. # + 2 is time buffer to do dir and lookup timeout = microsec2sec(cfg.LDMSD_RECONNECT_INTERVAL) + 1 inst_dir = ldms.LDMS_xprt_dir(agg_1stLevel_conn) assert (inst_dir is not None) while (len(inst_dir) == 0) and (passed_sec < timeout): passed_sec += 1 sleep(1) inst_dir = ldms.LDMS_xprt_dir(agg_1stLevel_conn) assert (inst_dir is not None) logger.info("Wait time: {0}".format(passed_sec)) assert (obj['instance'] in inst_dir)
def test_01_verify(self): """Verify data in the storage""" x = ldms.LDMS_xprt_new(self.XPRT) rc = ldms.LDMS_xprt_connect_by_name(x, "localhost", self.SMP_PORT) if rc: log.error("rc: %d" % rc) assert (rc == 0) dlist = ldms.LDMS_xprt_dir(x) self.assertEqual(len(dlist), 1) log.info("Looking up sets") _set = ldms.LDMS_xprt_lookup(x, dlist[0], 0) assert (_set) log.info("Collecting data from LDMS for comparison") data = [] for i in range(0, 10): # update first _set.update() d = ldms_set_as_dict(_set) data.append(d) time.sleep(1) time.sleep(1) # to make sure that the last data point has been stored log.info("Verifying...") keys = data[0].keys() for d in data: self.assertEqual(set(keys), set(d.keys())) for d in self.amqp_sink.data: self.assertEqual(set(keys), set(d.keys())) data = set(tuple_from_dict(d, keys) for d in data) amqp_data = set(tuple_from_dict(d, keys) for d in self.amqp_sink.data) self.assertGreater(len(data), 0) self.assertLessEqual(data, amqp_data)
def test_dir_owner(self): xprt = ldms.LDMS_xprt_new_with_auth(self.XPRT, self.AUTH, self.AUTH_OPT) rc = ldms.LDMS_xprt_connect_by_name(xprt, "localhost", self.PORT) self.assertEqual(rc, 0) _dir = ldms.LDMS_xprt_dir(xprt) self.assertEqual(_dir, ["smp/meminfo"])
def test_00_verify_cfg(self): """Verify sampler config, cmd-expand only env command""" host = socket.gethostname() xprt = ldms.LDMS_xprt_new(self.XPRT) rc = ldms.LDMS_xprt_connect_by_name(xprt, "localhost", "10001") self.assertEqual(rc, 0) dir_resp = ldms.LDMS_xprt_dir(xprt) self.assertEqual(dir_resp, [host + "/$(whoami)/meminfo"])
def test_2ndLevel_instance_after_samplerd_died(self, logger, cfg, obj, agg_2ndLevel_conn): # Since dir_update is activated, as soon as the local set on the 1st aggregator # is deleted, the 2nd-level aggregator should know right away. sleep(2) inst_dir = ldms.LDMS_xprt_dir(agg_2ndLevel_conn) assert (inst_dir is not None) assert (obj['instance'] not in inst_dir)
def test_1stLevel_instance_after_samplerd_died(self, logger, cfg, obj, agg_1stLevel_conn): # The aggregator should know right away that the set is gone # because the set is reset when ldmsd receives a DISCONNECTED event. sleep(1) inst_dir = ldms.LDMS_xprt_dir(agg_1stLevel_conn) assert (inst_dir is not None) assert (obj['instance'] not in inst_dir)
def test_2ndLevel_instance_after_samplerd_revived(self, logger, cfg, obj, agg_2ndLevel_conn): passed_sec = 0 # As soon as the local set on the 1st-level aggregator is created, # a dir_update is sent to the 2nd-level aggregator for the new added set. # the timeout period has nothing to do with the reconnect/update intervals. timeout = 2 inst_dir = ldms.LDMS_xprt_dir(agg_2ndLevel_conn) assert (inst_dir is not None) while (len(inst_dir) == 0) and (passed_sec < timeout): passed_sec += 1 sleep(1) inst_dir = ldms.LDMS_xprt_dir(agg_2ndLevel_conn) assert (inst_dir is not None) logger.info("Wait time: {0}".format(passed_sec)) assert (obj['instance'] in inst_dir)
def test_dir_owner(self): xprt = ldms.LDMS_xprt_new_with_auth(self.XPRT, self.AUTH, self.AUTH_OPT) rc = ldms.LDMS_xprt_connect_by_name(xprt, "localhost", self.PORT) if rc: raise RuntimeError("LDMS connect failed: %d" % rc) _dir = ldms.LDMS_xprt_dir(xprt) self.assertEqual(set(_dir), set(self.SETS)) ldms.ldms_xprt_close(xprt)
def _verify(self, xprt, job=None, clk=None, component_id=0, job_id="job_id", app_id="app_id", job_start="job_start", job_end="job_end"): set_names = [s for s in [job, clk] if s] dir_resp = ldms.LDMS_xprt_dir(xprt) self.assertEqual(set(dir_resp), set(set_names)) if not clk: return # snapshots of jobset and clkset if job: jsnap = [] csnap = [] if job: jobset = ldms.LDMS_xprt_lookup(xprt, job, 0) clkset = ldms.LDMS_xprt_lookup(xprt, clk, 0) for i in range(0, 3): if i: time.sleep(1) # skip the 1st sleep clkset.update() # blocking-update if job: jobset.update() # blocking-update jsnap.append(ldms_set_as_dict(jobset)) csnap.append(ldms_set_as_dict(clkset)) # verify component_id comp_id_set = set( c["component_id"] for c in csnap ) self.assertEqual(set([component_id]), comp_id_set) # verify clk["tv_sec"] updates ts_set = set( c["tv_sec"] for c in csnap ) self.assertEqual(2, len(ts_set)) ts_list = list(ts_set) ts_list.sort() self.assertEqual(2, ts_list[1] - ts_list[0]) # check job update (if applicable) if not job: return for c, j in zip(csnap, jsnap): self.assertNotEqual(0, c["app_id"]) self.assertNotEqual(0, c["job_id"]) self.assertLessEqual(abs(c["job_id"] - j[job_id]), 1) self.assertLessEqual(abs(c["app_id"] - j[app_id]), 1) job_ids = list(set( c["job_id"] for c in csnap )) job_ids.sort() d = adiff(job_ids) self.assertGreater(len(d), 0) self.assertEqual(d, [1 for i in range(0,len(d))]) app_ids = list(set( c["app_id"] for c in csnap )) app_ids.sort() d = adiff(app_ids) self.assertGreater(len(d), 0) self.assertEqual(d, [1 for i in range(0,len(d))])
def test_dir_other(self): auth_opt = {"uid": "5555", "gid": "5555"} xprt = ldms.LDMS_xprt_new_with_auth(self.XPRT, self.AUTH, auth_opt) rc = ldms.LDMS_xprt_connect_by_name(xprt, "localhost", self.PORT) if rc: raise RuntimeError("LDMS connect failed: %d" % rc) r = re.compile("0..6") _sets = [_s for _s, _p in zip(self.SETS, self.PERMS) if r.match(_p)] _dir = ldms.LDMS_xprt_dir(xprt) self.assertEqual(set(_dir), set(_sets)) ldms.ldms_xprt_close(xprt)
def __init__(self, port, xprt="sock", hostname="localhost"): self.xprt = ldms.LDMS_xprt_new(xprt) rc = ldms.LDMS_xprt_connect_by_name(self.xprt, hostname, port) assert (rc == 0) self.sets = [] self._dict = {} _dirs = ldms.LDMS_xprt_dir(self.xprt) for d in _dirs: s = ldms.LDMS_xprt_lookup(self.xprt, d, ldms.LDMS_LOOKUP_BY_INSTANCE) self.sets.append(s) self._dict[d] = s
def test_000(self): """Verify that the agg collects from both smp0 and smp1""" x = ldms.LDMS_xprt_new(self.XPRT) rc = ldms.LDMS_xprt_connect_by_name(x, "localhost", str(self.AGG_PORT_BASE)) self.assertEqual(rc, 0) dirs = ldms.LDMS_xprt_dir(x) self.assertEqual( set(dirs), set(["smp%d/meminfo" % i for i in range(0, self.SMP_NUM)])) sets = {d: ldms.LDMS_xprt_lookup(x, d, 0) for d in dirs} for k, s in sets.iteritems(): s.update() grp = re.match(r"smp(\d+)/meminfo", k).groups() comp_id = int(grp[0]) self.assertEqual(s['component_id'], comp_id) ts = s.ts_get() self.assertGreater(ts.sec, 0) pass
def test_01_req_noexp(self): """Request over xprt shall not be command-expanded""" ctrl = LDMSD_Controller(port=self.SMP_PORT, xprt=self.XPRT) ctrl.run() ctrl.read_pty() ctrl.write_pty("env X=$(hostname)\n") ctrl.write_pty("load name=vmstat\n") ctrl.write_pty("config name=vmstat producer=${X} \ instance=${X}/vmstat\ schema=vmstat\n") ctrl.write_pty("start name=vmstat interval=1000000 offset=0\n") time.sleep(0.2) host = socket.gethostname() xprt = ldms.LDMS_xprt_new(self.XPRT) rc = ldms.LDMS_xprt_connect_by_name(xprt, "localhost", "10001") self.assertEqual(rc, 0) dir_resp = ldms.LDMS_xprt_dir(xprt) dir_resp.sort() expected = [host + "/$(whoami)/meminfo", "$(hostname)/vmstat"] expected.sort() self.assertEqual(dir_resp, expected)
def test_samplerd_after_1st_agg_died(self, logger, cfg, obj, samplerd_conn): inst_dir = ldms.LDMS_xprt_dir(samplerd_conn) assert (inst_dir is not None) assert (obj['instance'] in inst_dir)
def _get_sets(self, port): x = ldms.LDMS_xprt_new(self.XPRT) rc = ldms.LDMS_xprt_connect_by_name(x, "localhost", port) _dirs = ldms.LDMS_xprt_dir(x) for d in _dirs: pass
def test_samplerd_instance_existence(self, logger, cfg, obj, samplerd_conn): inst_dir = ldms.LDMS_xprt_dir(samplerd_conn) assert (inst_dir is not None) assert (len(inst_dir) == cfg.NUM_TEST_INSTANCES_PER_HOST) assert (obj['instance'] in inst_dir)
def test_2ndLevel_isntance_after_1st_agg_died(self, logger, cfg, obj, agg_2ndLevel_conn): sleep(1) inst_dir = ldms.LDMS_xprt_dir(agg_2ndLevel_conn) assert (inst_dir is not None) assert (obj['instance'] not in inst_dir)
def setUpClass(cls): # Need 3 ldmsd .. the config objects are for aggregators log.info("Setting up TestLDMSDPerm") try: # samplers (producers) for prdcr in cls.PRDCRS: smp_cfg = """ load name=meminfo config name=meminfo producer=%(prdcr)s \ instance=%(prdcr)s/meminfo schema=meminfo start name=meminfo interval=1000000 offset=0 """ % prdcr log.debug("smp_cfg: %s" % smp_cfg) ldmsd = LDMSD(port=prdcr["port"], xprt=cls.XPRT, auth=cls.AUTH, auth_opt=cls.LDMSD_AUTH_OPT, cfg=smp_cfg, logfile=prdcr["logfile"]) log.info("starting %s" % prdcr["prdcr"]) ldmsd.run() cls.prdcrs.append(ldmsd) # aggregator cls.agg = LDMSD(port=cls.AGG_PORT, xprt=cls.XPRT, auth=cls.AUTH, auth_opt=cls.LDMSD_AUTH_OPT, logfile=cls.AGG_LOG) log.info("starting aggregator") cls.agg.run() time.sleep(1) # need to config separately so that prdcr,updtr pairs are owned by # different users. log.info("configuring aggregator") for prdcr in cls.PRDCRS: log.info("....adding %(prdcr)s" % prdcr) agg_cfg = """\ prdcr_add name=%(prdcr)s xprt=%(xprt)s host=localhost \ port=%(port)s type=active interval=1000000 \ perm=0600 prdcr_start name=%(prdcr)s updtr_add name=%(updtr)s interval=1000000 offset=500000 \ perm=0600 updtr_prdcr_add name=%(updtr)s regex=%(prdcr)s updtr_start name=%(updtr)s """ % prdcr log.debug("agg_cfg: %s" % agg_cfg) ctrl = ldmsdInbandConfig(host="localhost", port=cls.AGG_PORT, xprt=prdcr["xprt"], auth=prdcr["auth"], auth_opt=prdcr["auth_opt"]) for cmd in agg_cfg.splitlines(): cmd = cmd.strip() if not cmd: continue log.debug("cmd: %s" % cmd) req = LDMSD_Request.from_str(cmd) req.send(ctrl) resp = req.receive(ctrl) errcode = resp["errcode"] if errcode: raise RuntimeError("LDMSD Ctrl errcode: %d" % errcode) ctrl.close() time.sleep(1) # Verify that the agg is working as configured log.info("verifying aggregator") xprt = ldms.LDMS_xprt_new_with_auth(cls.XPRT, cls.AUTH, cls.LDMSD_AUTH_OPT) rc = ldms.LDMS_xprt_connect_by_name(xprt, "localhost", cls.AGG_PORT) if rc: raise RuntimeError("LDMS connect failed: %d" % rc) _dir = ldms.LDMS_xprt_dir(xprt) log.debug("dirs: %s" % str(_dir)) ldms.ldms_xprt_close(xprt) _edirs = [p["prdcr"] + "/meminfo" for p in cls.PRDCRS] if set(_dir) != set(_edirs): raise RuntimeError("Bad set ...") except: del cls.agg del cls.prdcrs raise log.info("TestLDMSDPerm set up done")