def test_1stLevel_agg_die(self, logger, cfg, obj): kill_ldmsd(hosts=[obj['agg_1stLevel_host']], xprt=cfg.AGG_XPRT, port=cfg.AGG_PORT) is_running = is_ldmsd_running(hosts=[obj['agg_1stLevel_host']], xprt=cfg.AGG_XPRT, port=cfg.AGG_PORT) if is_running[obj['agg_1stLevel_host']]: kill_9_ldmsd(hosts=[obj['agg_1stLevel_host']], xprt=cfg.AGG_XPRT, port=cfg.AGG_PORT) remove_file(hosts=[obj['agg_1stLevel_host']], filepath=cfg.AGG_SOCK) is_running = is_ldmsd_running(hosts=[obj['agg_1stLevel_host']], xprt=cfg.AGG_XPRT, port=cfg.AGG_PORT) assert (not is_running[obj['agg_1stLevel_host']]) sleep(3) is_samplerd_running = is_ldmsd_running(hosts=[obj['samplerd_host']], xprt=cfg.SAMPLERD_XPRT, port=cfg.SAMPLERD_PORT) assert (is_samplerd_running[obj['samplerd_host']]) is_2ndLevel_agg_running = is_ldmsd_running( hosts=[obj['agg_2ndLevel_host']], xprt=cfg.AGG2_XPRT, port=cfg.AGG2_PORT) assert (is_2ndLevel_agg_running[obj['agg_2ndLevel_host']])
def test_samplerd_die(self, logger, cfg, obj): kill_ldmsd(hosts=obj['samplerd_host'], xprt=cfg.SAMPLERD_XPRT, port=cfg.SAMPLERD_PORT) is_running = is_ldmsd_running(hosts=obj['samplerd_host'], xprt=cfg.SAMPLERD_XPRT, port=cfg.SAMPLERD_PORT) if is_running[obj['samplerd_host']]: kill_9_ldmsd(hosts=obj['samplerd_host'], xprt=cfg.SAMPLERD_XPRT, port=cfg.SAMPLERD_PORT) remove_file(hosts=[obj['samplerd_host']], filepath=cfg.SAMPLERD_SOCK) # Sleep to make sure that the aggregators do not crash. # If the test doesn't sleep, the aggregators could die after # the check point. sleep(3) is_agg_running = is_ldmsd_running(hosts=[obj['agg_1stLevel_host']], xprt=cfg.AGG_XPRT, port=cfg.AGG_PORT) assert (is_agg_running[obj['agg_1stLevel_host']]) is_agg_running = is_ldmsd_running(hosts=[obj['agg_2ndLevel_host']], xprt=cfg.AGG2_XPRT, port=cfg.AGG2_PORT) assert (is_agg_running[obj['agg_2ndLevel_host']])
def stop_test_ldmsds(hosts, xprt, port, sock, log): kill_ldmsd(hosts=hosts, xprt=xprt, port=port) is_running = is_ldmsd_running(hosts, xprt, port) not_died = filter(lambda host: is_running[host], is_running) if len(not_died) > 0: log.debug("---- kill 9 samplerd: {0}".format(",".join(not_died))) kill_9_ldmsd(hosts=not_died, xprt=xprt, port=port) remove_file(hosts=hosts, filepath=sock)
def test_samplerd_die(self, logger, cfg, obj): kill_ldmsd(hosts=obj['samplerd_host'], xprt=cfg.SAMPLERD_XPRT, port=cfg.SAMPLERD_PORT) is_running = is_ldmsd_running(hosts=obj['samplerd_host'], xprt=cfg.SAMPLERD_XPRT, port=cfg.SAMPLERD_PORT) if is_running[obj['samplerd_host']]: kill_9_ldmsd(hosts=obj['samplerd_host'], xprt=cfg.SAMPLERD_XPRT, port=cfg.SAMPLERD_PORT) remove_file(hosts=obj['samplerd_host'], filepath=cfg.SAMPLERD_SOCK) sleep(3) is_agg_running = is_ldmsd_running(hosts=obj['agg_host'], xprt=cfg.AGG_XPRT, port=cfg.AGG_PORT) assert (is_agg_running[obj['agg_host']])
def startup_ldmsd_processes(request, logger, cfg): logger.debug("------------ starting ldmsd processes") sleep(1) #=====Start of stop_ldmsd_processes() def stop_ldmsd_processes(): logger.debug("------------ stopping ldmsd processes") stop_test_ldmsds(cfg.SAMPLERD_HOSTS, cfg.SAMPLERD_XPRT, cfg.SAMPLERD_PORT, cfg.SAMPLERD_SOCK, logger) stop_test_ldmsds(cfg.AGG_HOSTS, cfg.AGG_XPRT, cfg.AGG_PORT, cfg.SAMPLERD_SOCK, logger) logger.debug("------------ stopping ... DONE") #====== END of stop_ldmsd_processes() try: remove_file(hosts=cfg.SAMPLERD_HOSTS, filepath=cfg.SAMPLERD_LOG) remove_file(hosts=cfg.SAMPLERD_HOSTS, filepath=cfg.SAMPLERD_SOCK) remove_file(hosts=cfg.AGG_HOSTS, filepath=cfg.AGG_LOG) remove_file(hosts=cfg.AGG_HOSTS, filepath=cfg.AGG_SOCK) start_test_ldmsds(hosts=cfg.SAMPLERD_HOSTS, xprt=cfg.SAMPLERD_XPRT, port=cfg.SAMPLERD_PORT, log=cfg.SAMPLERD_LOG, sock=cfg.SAMPLERD_SOCK, test_set_name=cfg.TEST_INSTANCE_PREFIX_NAME, test_set_count=cfg.NUM_TEST_INSTANCES_PER_HOST, test_metric_count=cfg.TEST_INSTANCE_NUM_METRICS, inet_ctrl_port=cfg.SAMPLERD_INET_CTRL_PORT, verbose=cfg.SAMPLERD_VERBOSE) start_test_ldmsds(hosts=cfg.AGG_HOSTS, xprt=cfg.AGG_XPRT, port=cfg.AGG_PORT, log=cfg.AGG_LOG, sock=cfg.AGG_SOCK, inet_ctrl_port=cfg.AGG_INET_CTRL_PORT, verbose=cfg.AGG_VERBOSE) logger.debug("------------ starting ... DONE") except: stop_ldmsd_processes() raise request.addfinalizer(stop_ldmsd_processes)
def main(argv = None): if argv is None: argv = sys.argv else: sys.argv.extend(argv) try: parser = ArgumentParser() parser.add_argument('--config-file', help = "Path to the config file", required = True) parser.add_argument('--start-samplerd', help = "start samplerd ldmsd", action = "store_true") parser.add_argument('--kill-samplerd', action = "store_true", help = "kill samplerd ldmsd") parser.add_argument('--start-agg', action = "store_true", help = "start agg ldmsd") parser.add_argument('--kill-agg', action = "store_true", help = "kill agg ldmsd") parser.add_argument('--kill-9-samplerd', action = "store_true", help = "kill 9 samplerd ldmsd") parser.add_argument('--kill-9-agg', action = "store_true", help = "kill 9 agg ldmsd") parser.add_argument('--remove-samplerd-files', action = "store_true", help = "Remove the samplerd's log and sock files") parser.add_argument('--remove-agg-files', action = "store_true", help = "Remove the aggregataor(s)'s log and sock files") parser.add_argument('--check-samplerd', action = "store_true", help = "Check samplerd ldmsd running") parser.add_argument('--check-agg', action = "store_true", help = "Check agg ldmsd running") parser.add_argument('--samplerd-pid', help = "Get the samplerd PIDs", action = "store_true") parser.add_argument('--agg-pid', help = "Get the agg PIDs", action = "store_true") args = parser.parse_args() cfg = get_var_from_file(module_name = "cfg", filepath = args.config_file) if args.start_samplerd: print "start samplerd.." start_ldmsd(hosts = cfg.SAMPLERD_HOSTS, xprt = cfg.SAMPLERD_XPRT, port = cfg.SAMPLERD_PORT, log = cfg.SAMPLERD_LOG, sockname = cfg.SAMPLERD_SOCK) if args.start_agg: print "start agg.." start_ldmsd(hosts = cfg.AGG_HOSTS, xprt = cfg.AGG_XPRT, port = cfg.AGG_PORT, log = cfg.AGG_LOG, sockname = cfg.AGG_SOCK) if args.kill_samplerd: print "kill samplerd.." kill_ldmsd(hosts = cfg.SAMPLERD_HOSTS, xprt = cfg.SAMPLERD_XPRT, port = cfg.SAMPLERD_PORT) if args.kill_agg: print "kill agg.." kill_ldmsd(hosts = cfg.AGG_HOSTS, xprt = cfg.AGG_XPRT, port = cfg.AGG_PORT) if args.kill_9_samplerd: print "kill 9 samplerd.." kill_9_ldmsd(hosts = cfg.SAMPLERD_HOSTS, xprt = cfg.SAMPLERD_XPRT, port = cfg.SAMPLERD_PORT) if args.kill_9_agg: print "kill 9 agg.." kill_9_ldmsd(hosts = cfg.AGG_HOSTS, xprt = cfg.AGG_XPRT, port = cfg.AGG_PORT) if args.remove_samplerd_files: print "Removing the files of samplerd" remove_file(cfg.SAMPLERD_HOSTS, cfg.SAMPLERD_LOG) remove_file(cfg.SAMPLERD_HOSTS, cfg.SAMPLERD_SOCK) if args.remove_agg_files: print "Removing the files of aggregators" remove_file(cfg.AGG_HOSTS, cfg.AGG_LOG) remove_file(cfg.AGG_HOSTS, cfg.AGG_SOCK) if args.check_samplerd: print "Check samplerd ldmsd running? ...." print is_ldmsd_running(hosts = cfg.SAMPLERD_HOSTS, xprt = cfg.SAMPLERD_XPRT, port = cfg.SAMPLERD_PORT) if args.check_agg: print "Check agg ldmsd running? ...." print is_ldmsd_running(hosts = cfg.AGG_HOSTS, xprt = cfg.AGG_XPRT, port = cfg.AGG_PORT) if args.samplerd_pid: print "Getting samplerd pid" print get_ldmsd_pid(hosts = cfg.SAMPLERD_HOSTS, xprt = cfg.SAMPLERD_XPRT, port = cfg.SAMPLERD_PORT) if args.agg_pid: print "Getting agg pid" print get_ldmsd_pid(hosts = cfg.AGG_HOSTS, xprt = cfg.AGG_XPRT, port = cfg.AGG_PORT) except KeyboardInterrupt: return 0 except Exception: traceback.print_exc() return 2