def run_client(graphmgr_addr, load): saved_cfg = None if load is not None: try: with open(load, 'rb') as cnf: saved_cfg = dill.load(cnf) except OSError: logger.exception( "ami-client: problem opening saved graph configuration file:") return 1 except dill.UnpicklingError: logger.exception( "ami-client: problem parsing saved graph configuration file (%s):", load) return 1 queue = mp.Queue() list_proc = mp.Process(target=run_main_window, args=(queue, graphmgr_addr, saved_cfg)) list_proc.start() widget_procs = [] while True: window_type, name, topic = queue.get() if window_type == 'exit': logger.info("received exit signal - exiting!") break logger.debug("opening new widget: %s %s %s", window_type, name, topic) proc = mp.Process(target=run_widget, args=(queue, window_type, name, topic, graphmgr_addr)) proc.start() widget_procs.append(proc)
def start_ami(request, workerjson): try: from pytest_cov.embed import cleanup_on_sigterm cleanup_on_sigterm() except ImportError: pass parser = build_parser() args = parser.parse_args([ "-n", "1", '--headless', '--tcp', '%s://%s' % (request.param, workerjson) ]) queue = mp.Queue() ami = mp.Process(name='ami', target=run_ami, args=(args, queue)) ami.start() try: host = "127.0.0.1" comm_addr = "tcp://%s:%d" % (host, BasePort + Ports.Comm) with GraphCommHandler(args.graph_name, comm_addr) as comm_handler: yield comm_handler except Exception as e: # let the fixture exit 'gracefully' if it fails print(e) yield None finally: queue.put(None) ami.join(1) # if ami still hasn't exitted then kill it if ami.is_alive(): ami.terminate() ami.join(1) if ami.exitcode == 0 or ami.exitcode == -signal.SIGTERM: return 0 else: print('AMI exited with non-zero status code: %d' % ami.exitcode) return 1
def run_ami(args, queue=None): xtcdir = None ipcdir = None owns_ipcdir = True flags = {} if queue is None: queue = mp.Queue() if args.ipc: ipcdir = tempfile.mkdtemp() owns_ipcdir = True elif args.ipc_dir is not None: ipcdir = args.ipc_dir owns_ipcdir = False if ipcdir is None: host = "127.0.0.1" comm_addr = "tcp://%s:%d" % (host, args.port) graph_addr = "tcp://%s:%d" % (host, args.port+1) collector_addr = "tcp://%s:%d" % (host, args.port+2) globalcol_addr = "tcp://%s:%d" % (host, args.port+3) results_addr = "tcp://%s:%d" % (host, args.port+4) export_addr = "tcp://%s:%d" % (host, args.port+5) msg_addr = "tcp://%s:%d" % (host, args.port+6) info_addr = "tcp://%s:%d" % (host, args.port+7) view_addr = "tcp://%s:%d" % (host, args.port+8) profile_addr = "tcp://%s:%d" % (host, args.port+9) else: collector_addr = "ipc://%s/node_collector" % ipcdir globalcol_addr = "ipc://%s/collector" % ipcdir graph_addr = "ipc://%s/graph" % ipcdir comm_addr = "ipc://%s/comm" % ipcdir results_addr = "ipc://%s/results" % ipcdir export_addr = "ipc://%s/export" % ipcdir msg_addr = "ipc://%s/message" % ipcdir info_addr = "ipc://%s/info" % ipcdir view_addr = "ipc://%s/view" % ipcdir profile_addr = "ipc://%s/profile" % ipcdir procs = [] client_proc = None log_handlers = [logging.StreamHandler()] if args.headless or args.console: console_fmt = logging.Formatter(LogConfig.BasicFormat) log_handlers[0].setFormatter(console_fmt) if args.log_file is not None: log_handlers.append(logging.FileHandler(args.log_file)) log_level = getattr(logging, args.log_level.upper(), logging.INFO) logging.basicConfig(format=LogConfig.FullFormat, level=log_level, handlers=log_handlers) try: for flag in args.flags: try: key, value = flag.split('=') flags[key] = value except ValueError: logger.exception("Problem parsing data source flag %s", flag) if args.source is not None: src_url_match = re.match('(?P<prot>.*)://(?P<body>.*)', args.source) if src_url_match: src_cfg = src_url_match.groups() else: logger.critical("Invalid data source config string: %s", args.source) return 1 else: src_cfg = None for i in range(args.num_workers): proc = mp.Process( name='worker%03d-n0' % i, target=functools.partial(_sys_exit, run_worker), args=(i, args.num_workers, args.heartbeat, src_cfg, collector_addr, graph_addr, msg_addr, export_addr, flags, args.prometheus_dir, args.hutch) ) proc.daemon = True proc.start() procs.append(proc) collector_proc = mp.Process( name='nodecol-n0', target=functools.partial(_sys_exit, run_node_collector), args=(0, args.num_workers, collector_addr, globalcol_addr, graph_addr, msg_addr, args.prometheus_dir, args.hutch) ) collector_proc.daemon = True collector_proc.start() procs.append(collector_proc) globalcol_proc = mp.Process( name='globalcol', target=functools.partial(_sys_exit, run_global_collector), args=(0, 1, globalcol_addr, results_addr, graph_addr, msg_addr, args.prometheus_dir, args.hutch) ) globalcol_proc.daemon = True globalcol_proc.start() procs.append(globalcol_proc) manager_proc = mp.Process( name='manager', target=functools.partial(_sys_exit, run_manager), args=(args.num_workers, 1, results_addr, graph_addr, comm_addr, msg_addr, info_addr, export_addr, view_addr, profile_addr, args.prometheus_dir, args.hutch) ) manager_proc.daemon = True manager_proc.start() procs.append(manager_proc) if args.export: if run_export is None: logger.critical("Export module is not available: p4p needs to be installed to use the export feature!") return 1 export_proc = mp.Process( name='export', target=functools.partial(_sys_exit, run_export), args=(args.export, comm_addr, export_addr, args.aggregate) ) export_proc.daemon = True export_proc.start() procs.append(export_proc) if not (args.console or args.headless): client_proc = mp.Process( name='client', target=run_client, args=(args.graph_name, comm_addr, info_addr, view_addr, profile_addr, args.load, args.gui_mode, args.prometheus_dir, args.hutch) ) client_proc.daemon = False client_proc.start() procs.append(client_proc) # register a signal handler for cleanup on sigterm signal.signal(signal.SIGTERM, functools.partial(_sig_handler, procs)) if args.console: run_console(args.graph_name, comm_addr, args.load) elif args.headless: if args.load: comm_handler = GraphCommHandler(args.graph_name, comm_addr) comm_handler.load(args.load) while queue.empty(): pass else: client_proc.join() except KeyboardInterrupt: logger.info("Worker killed by user...") finally: failed_proc = cleanup(procs) # cleanup ipc directories if owns_ipcdir and ipcdir is not None and os.path.exists(ipcdir): shutil.rmtree(ipcdir) if xtcdir is not None and os.path.exists(xtcdir): shutil.rmtree(xtcdir) # return a non-zero status code if any workerss died if client_proc is not None and client_proc.exitcode != 0: return client_proc.exitcode elif failed_proc: return 1
def run_ami(args, queue=None): flags = {} if queue is None: queue = mp.Queue() host = args.host graph_addr = "tcp://%s:%d" % (host, args.port + 1) collector_addr = "tcp://127.0.0.1:%d" % (args.port + 2) globalcol_addr = "tcp://%s:%d" % (host, args.port + 3) export_addr = "tcp://%s:%d" % (host, args.port + 5) msg_addr = "tcp://%s:%d" % (host, args.port + 6) procs = [] log_handlers = [logging.StreamHandler()] if args.log_file is not None: log_handlers.append(logging.FileHandler(args.log_file)) log_level = getattr(logging, args.log_level.upper(), logging.INFO) logging.basicConfig(format=LogConfig.FullFormat, level=log_level, handlers=log_handlers) try: for flag in args.flags: try: key, value = flag.split('=') flags[key] = value except ValueError: logger.exception("Problem parsing data source flag %s", flag) if args.source is not None: src_url_match = re.match('(?P<prot>.*)://(?P<body>.*)', args.source) if src_url_match: src_cfg = src_url_match.groups() else: logger.critical("Invalid data source config string: %s", args.source) return 1 else: src_cfg = None comm = MPI.COMM_WORLD size = comm.Get_size() - 2 global_rank = comm.Get_rank() local_comm = comm.Split_type(MPI.COMM_TYPE_SHARED, global_rank, MPI.INFO_NULL) local_rank_size = local_comm.Get_size() local_rank = local_comm.Get_rank() node_rank = global_rank // local_rank_size # name = MPI.Get_processor_name() # print(f"SIZE: {size}, RANK: {global_rank}, LOCAL RANK: {local_rank}, NODE RANK: {node_rank} NAME: {name}") if local_rank == 0: collector_proc = mp.Process( name=f'nodecol-n{node_rank}', target=functools.partial(_sys_exit, run_node_collector), args=(node_rank, local_rank_size, collector_addr, globalcol_addr, graph_addr, msg_addr, args.prometheus_dir, args.hutch)) collector_proc.daemon = True collector_proc.start() procs.append(collector_proc) run_worker(global_rank, size, args.heartbeat, src_cfg, collector_addr, graph_addr, msg_addr, export_addr, flags, args.prometheus_dir, args.hutch) # register a signal handler for cleanup on sigterm signal.signal(signal.SIGTERM, functools.partial(_sig_handler, procs)) while True: pass except KeyboardInterrupt: logger.info("Worker killed by user...") finally: failed_proc = cleanup(procs) # return a non-zero status code if any workerss died if failed_proc: return 1