Ejemplo n.º 1
0
def run_client(graphmgr_addr, load):
    saved_cfg = None
    if load is not None:
        try:
            with open(load, 'rb') as cnf:
                saved_cfg = dill.load(cnf)
        except OSError:
            logger.exception(
                "ami-client: problem opening saved graph configuration file:")
            return 1
        except dill.UnpicklingError:
            logger.exception(
                "ami-client: problem parsing saved graph configuration file (%s):",
                load)
            return 1

    queue = mp.Queue()
    list_proc = mp.Process(target=run_main_window,
                           args=(queue, graphmgr_addr, saved_cfg))
    list_proc.start()
    widget_procs = []

    while True:
        window_type, name, topic = queue.get()
        if window_type == 'exit':
            logger.info("received exit signal - exiting!")
            break
        logger.debug("opening new widget: %s %s %s", window_type, name, topic)
        proc = mp.Process(target=run_widget,
                          args=(queue, window_type, name, topic,
                                graphmgr_addr))
        proc.start()
        widget_procs.append(proc)
Ejemplo n.º 2
0
def start_ami(request, workerjson):
    try:
        from pytest_cov.embed import cleanup_on_sigterm
        cleanup_on_sigterm()
    except ImportError:
        pass

    parser = build_parser()
    args = parser.parse_args([
        "-n", "1", '--headless', '--tcp',
        '%s://%s' % (request.param, workerjson)
    ])

    queue = mp.Queue()
    ami = mp.Process(name='ami', target=run_ami, args=(args, queue))
    ami.start()

    try:
        host = "127.0.0.1"
        comm_addr = "tcp://%s:%d" % (host, BasePort + Ports.Comm)
        with GraphCommHandler(args.graph_name, comm_addr) as comm_handler:
            yield comm_handler
    except Exception as e:
        # let the fixture exit 'gracefully' if it fails
        print(e)
        yield None
    finally:
        queue.put(None)
        ami.join(1)
        # if ami still hasn't exitted then kill it
        if ami.is_alive():
            ami.terminate()
            ami.join(1)

        if ami.exitcode == 0 or ami.exitcode == -signal.SIGTERM:
            return 0
        else:
            print('AMI exited with non-zero status code: %d' % ami.exitcode)
            return 1
Ejemplo n.º 3
0
def run_ami(args, queue=None):
    xtcdir = None
    ipcdir = None
    owns_ipcdir = True
    flags = {}
    if queue is None:
        queue = mp.Queue()
    if args.ipc:
        ipcdir = tempfile.mkdtemp()
        owns_ipcdir = True
    elif args.ipc_dir is not None:
        ipcdir = args.ipc_dir
        owns_ipcdir = False
    if ipcdir is None:
        host = "127.0.0.1"
        comm_addr = "tcp://%s:%d" % (host, args.port)
        graph_addr = "tcp://%s:%d" % (host, args.port+1)
        collector_addr = "tcp://%s:%d" % (host, args.port+2)
        globalcol_addr = "tcp://%s:%d" % (host, args.port+3)
        results_addr = "tcp://%s:%d" % (host, args.port+4)
        export_addr = "tcp://%s:%d" % (host, args.port+5)
        msg_addr = "tcp://%s:%d" % (host, args.port+6)
        info_addr = "tcp://%s:%d" % (host, args.port+7)
        view_addr = "tcp://%s:%d" % (host, args.port+8)
        profile_addr = "tcp://%s:%d" % (host, args.port+9)
    else:
        collector_addr = "ipc://%s/node_collector" % ipcdir
        globalcol_addr = "ipc://%s/collector" % ipcdir
        graph_addr = "ipc://%s/graph" % ipcdir
        comm_addr = "ipc://%s/comm" % ipcdir
        results_addr = "ipc://%s/results" % ipcdir
        export_addr = "ipc://%s/export" % ipcdir
        msg_addr = "ipc://%s/message" % ipcdir
        info_addr = "ipc://%s/info" % ipcdir
        view_addr = "ipc://%s/view" % ipcdir
        profile_addr = "ipc://%s/profile" % ipcdir

    procs = []
    client_proc = None

    log_handlers = [logging.StreamHandler()]
    if args.headless or args.console:
        console_fmt = logging.Formatter(LogConfig.BasicFormat)
        log_handlers[0].setFormatter(console_fmt)
    if args.log_file is not None:
        log_handlers.append(logging.FileHandler(args.log_file))
    log_level = getattr(logging, args.log_level.upper(), logging.INFO)
    logging.basicConfig(format=LogConfig.FullFormat, level=log_level, handlers=log_handlers)

    try:
        for flag in args.flags:
            try:
                key, value = flag.split('=')
                flags[key] = value
            except ValueError:
                logger.exception("Problem parsing data source flag %s", flag)

        if args.source is not None:
            src_url_match = re.match('(?P<prot>.*)://(?P<body>.*)', args.source)
            if src_url_match:
                src_cfg = src_url_match.groups()
            else:
                logger.critical("Invalid data source config string: %s", args.source)
                return 1
        else:
            src_cfg = None

        for i in range(args.num_workers):
            proc = mp.Process(
                name='worker%03d-n0' % i,
                target=functools.partial(_sys_exit, run_worker),
                args=(i, args.num_workers, args.heartbeat, src_cfg,
                      collector_addr, graph_addr, msg_addr, export_addr, flags, args.prometheus_dir, args.hutch)
            )
            proc.daemon = True
            proc.start()
            procs.append(proc)

        collector_proc = mp.Process(
            name='nodecol-n0',
            target=functools.partial(_sys_exit, run_node_collector),
            args=(0, args.num_workers, collector_addr, globalcol_addr, graph_addr, msg_addr,
                  args.prometheus_dir, args.hutch)
        )
        collector_proc.daemon = True
        collector_proc.start()
        procs.append(collector_proc)

        globalcol_proc = mp.Process(
            name='globalcol',
            target=functools.partial(_sys_exit, run_global_collector),
            args=(0, 1, globalcol_addr, results_addr, graph_addr, msg_addr,
                  args.prometheus_dir, args.hutch)
        )
        globalcol_proc.daemon = True
        globalcol_proc.start()
        procs.append(globalcol_proc)

        manager_proc = mp.Process(
            name='manager',
            target=functools.partial(_sys_exit, run_manager),
            args=(args.num_workers, 1, results_addr, graph_addr, comm_addr, msg_addr, info_addr, export_addr,
                  view_addr, profile_addr, args.prometheus_dir, args.hutch)
        )
        manager_proc.daemon = True
        manager_proc.start()
        procs.append(manager_proc)

        if args.export:
            if run_export is None:
                logger.critical("Export module is not available: p4p needs to be installed to use the export feature!")
                return 1
            export_proc = mp.Process(
                name='export',
                target=functools.partial(_sys_exit, run_export),
                args=(args.export, comm_addr, export_addr, args.aggregate)
            )
            export_proc.daemon = True
            export_proc.start()
            procs.append(export_proc)

        if not (args.console or args.headless):
            client_proc = mp.Process(
                name='client',
                target=run_client,
                args=(args.graph_name, comm_addr, info_addr, view_addr, profile_addr, args.load, args.gui_mode,
                      args.prometheus_dir, args.hutch)
            )
            client_proc.daemon = False
            client_proc.start()
            procs.append(client_proc)

        # register a signal handler for cleanup on sigterm
        signal.signal(signal.SIGTERM, functools.partial(_sig_handler, procs))

        if args.console:
            run_console(args.graph_name, comm_addr, args.load)
        elif args.headless:
            if args.load:
                comm_handler = GraphCommHandler(args.graph_name, comm_addr)
                comm_handler.load(args.load)
            while queue.empty():
                pass
        else:
            client_proc.join()

    except KeyboardInterrupt:
        logger.info("Worker killed by user...")
    finally:
        failed_proc = cleanup(procs)
        # cleanup ipc directories
        if owns_ipcdir and ipcdir is not None and os.path.exists(ipcdir):
            shutil.rmtree(ipcdir)
        if xtcdir is not None and os.path.exists(xtcdir):
            shutil.rmtree(xtcdir)
        # return a non-zero status code if any workerss died
        if client_proc is not None and client_proc.exitcode != 0:
            return client_proc.exitcode
        elif failed_proc:
            return 1
Ejemplo n.º 4
0
def run_ami(args, queue=None):
    flags = {}
    if queue is None:
        queue = mp.Queue()

    host = args.host
    graph_addr = "tcp://%s:%d" % (host, args.port + 1)
    collector_addr = "tcp://127.0.0.1:%d" % (args.port + 2)
    globalcol_addr = "tcp://%s:%d" % (host, args.port + 3)
    export_addr = "tcp://%s:%d" % (host, args.port + 5)
    msg_addr = "tcp://%s:%d" % (host, args.port + 6)

    procs = []

    log_handlers = [logging.StreamHandler()]
    if args.log_file is not None:
        log_handlers.append(logging.FileHandler(args.log_file))
    log_level = getattr(logging, args.log_level.upper(), logging.INFO)
    logging.basicConfig(format=LogConfig.FullFormat,
                        level=log_level,
                        handlers=log_handlers)

    try:
        for flag in args.flags:
            try:
                key, value = flag.split('=')
                flags[key] = value
            except ValueError:
                logger.exception("Problem parsing data source flag %s", flag)

        if args.source is not None:
            src_url_match = re.match('(?P<prot>.*)://(?P<body>.*)',
                                     args.source)
            if src_url_match:
                src_cfg = src_url_match.groups()
            else:
                logger.critical("Invalid data source config string: %s",
                                args.source)
                return 1
        else:
            src_cfg = None

        comm = MPI.COMM_WORLD
        size = comm.Get_size() - 2
        global_rank = comm.Get_rank()

        local_comm = comm.Split_type(MPI.COMM_TYPE_SHARED, global_rank,
                                     MPI.INFO_NULL)
        local_rank_size = local_comm.Get_size()
        local_rank = local_comm.Get_rank()
        node_rank = global_rank // local_rank_size

        # name = MPI.Get_processor_name()
        # print(f"SIZE: {size}, RANK: {global_rank}, LOCAL RANK: {local_rank}, NODE RANK: {node_rank} NAME: {name}")

        if local_rank == 0:
            collector_proc = mp.Process(
                name=f'nodecol-n{node_rank}',
                target=functools.partial(_sys_exit, run_node_collector),
                args=(node_rank, local_rank_size, collector_addr,
                      globalcol_addr, graph_addr, msg_addr,
                      args.prometheus_dir, args.hutch))
            collector_proc.daemon = True
            collector_proc.start()
            procs.append(collector_proc)

        run_worker(global_rank, size, args.heartbeat, src_cfg, collector_addr,
                   graph_addr, msg_addr, export_addr, flags,
                   args.prometheus_dir, args.hutch)

        # register a signal handler for cleanup on sigterm
        signal.signal(signal.SIGTERM, functools.partial(_sig_handler, procs))

        while True:
            pass

    except KeyboardInterrupt:
        logger.info("Worker killed by user...")
    finally:
        failed_proc = cleanup(procs)
        # return a non-zero status code if any workerss died
        if failed_proc:
            return 1