Exemple #1
0
def run_cmd(args, timer=None):
    logger.debug('dcop command "solve" with arguments {}'.format(args))

    global INFINITY
    INFINITY = args.infinity

    global collect_on
    collect_on = args.collect_on
    period = None
    if args.collect_on == 'period':
        period = 1 if args.period is None else args.period
    else:
        if args.period is not None:
            _error('Cannot use "period" argument when collect_on is not '
                   '"period"')

    csv_cb = prepare_metrics_files(args.run_metrics, args.end_metrics,
                                   collect_on)

    if args.distribution in ['oneagent', 'adhoc', 'ilp_fgdp']:
        dist_module, algo_module, graph_module = _load_modules(args.distribution,
                                                               args.algo)
    else:
        dist_module, algo_module, graph_module = _load_modules(None,
                                                               args.algo)

    global dcop
    logger.info('loading dcop from {}'.format(args.dcop_files))
    dcop = load_dcop_from_file(args.dcop_files)

    # Build factor-graph computation graph
    logger.info('Building computation graph ')
    cg = graph_module.build_computation_graph(dcop)
    logger.debug('Computation graph: %s ', cg)

    logger.info('Distributing computation graph ')
    if dist_module is not None:
        distribution = dist_module.\
            distribute(cg, dcop.agents.values(),
                       hints=dcop.dist_hints,
                       computation_memory=algo_module.computation_memory,
                       communication_load=algo_module.communication_load)
    else:
        distribution = load_dist_from_file(args.distribution)
    logger.debug('Distribution Computation graph: %s ', distribution)

    logger.info('Dcop distribution : {}'.format(distribution))

    algo = build_algo_def(algo_module, args.algo, dcop.objective,
                            args.algo_params)

    # Setup metrics collection
    collector_queue = Queue()
    collect_t = Thread(target=collect_tread,
                       args=[collector_queue, csv_cb],
                       daemon=True)
    collect_t.start()

    global orchestrator
    if args.mode == 'thread':
        orchestrator = run_local_thread_dcop(algo, cg, distribution, dcop,
                                             INFINITY,
                                             collector=collector_queue,
                                             collect_moment=args.collect_on,
                                             period=period)
    elif args.mode == 'process':

        # Disable logs from agents, they are in other processes anyway
        agt_logs = logging.getLogger('pydcop.agent')
        agt_logs.disabled = True

        # When using the (default) 'fork' start method, http servers on agent's
        # processes do not work (why ?)
        multiprocessing.set_start_method('spawn')
        orchestrator = run_local_process_dcop(algo, cg, distribution, dcop,
                                              INFINITY,
                                              collector=collector_queue,
                                              collect_moment=args.collect_on,
                                              period=period)

    try:
        orchestrator.deploy_computations()
        orchestrator.run()
    except Exception as e:
        logger.error(e, exc_info=1)
        orchestrator.stop_agents(5)
        orchestrator.stop()
        _results('ERROR')
Exemple #2
0
def run_cmd(args, timer=None, timeout=None):
    logger.debug('dcop command "solve" with arguments {}'.format(args))

    global INFINITY, collect_on, output_file
    INFINITY = args.infinity
    output_file = args.output
    collect_on = args.collect_on

    period = None
    if args.collect_on == "period":
        period = 1 if args.period is None else args.period
    else:
        if args.period is not None:
            _error('Cannot use "period" argument when collect_on is not '
                   '"period"')

    csv_cb = prepare_metrics_files(args.run_metrics, args.end_metrics,
                                   collect_on)

    if args.distribution in DISTRIBUTION_METHODS:
        dist_module, algo_module, graph_module = _load_modules(
            args.distribution, args.algo)
    else:
        dist_module, algo_module, graph_module = _load_modules(None, args.algo)

    global dcop
    logger.info("loading dcop from {}".format(args.dcop_files))
    dcop = load_dcop_from_file(args.dcop_files)
    logger.debug(f"dcop  {dcop} ")

    # Build factor-graph computation graph
    logger.info("Building computation graph ")
    cg = graph_module.build_computation_graph(dcop)
    logger.debug("Computation graph: %s ", cg)

    logger.info("Distributing computation graph ")
    if dist_module is not None:

        if not hasattr(algo_module, "computation_memory"):
            algo_module.computation_memory = lambda *v, **k: 0
        if not hasattr(algo_module, "communication_load"):
            algo_module.communication_load = lambda *v, **k: 0

        distribution = dist_module.distribute(
            cg,
            dcop.agents.values(),
            hints=dcop.dist_hints,
            computation_memory=algo_module.computation_memory,
            communication_load=algo_module.communication_load,
        )
    else:
        distribution = load_dist_from_file(args.distribution)
    logger.debug("Distribution Computation graph: %s ", distribution)

    logger.info("Dcop distribution : {}".format(distribution))

    algo = build_algo_def(algo_module, args.algo, dcop.objective,
                          args.algo_params)

    # Setup metrics collection
    collector_queue = Queue()
    collect_t = Thread(target=collect_tread,
                       args=[collector_queue, csv_cb],
                       daemon=True)
    collect_t.start()

    global orchestrator
    if args.mode == "thread":
        orchestrator = run_local_thread_dcop(
            algo,
            cg,
            distribution,
            dcop,
            INFINITY,
            collector=collector_queue,
            collect_moment=args.collect_on,
            period=period,
            delay=args.delay,
            uiport=args.uiport,
        )
    elif args.mode == "process":

        # Disable logs from agents, they are in other processes anyway
        agt_logs = logging.getLogger("pydcop.agent")
        agt_logs.disabled = True

        # When using the (default) 'fork' start method, http servers on agent's
        # processes do not work (why ?)
        multiprocessing.set_start_method("spawn")
        orchestrator = run_local_process_dcop(
            algo,
            cg,
            distribution,
            dcop,
            INFINITY,
            collector=collector_queue,
            collect_moment=args.collect_on,
            period=period,
            delay=args.delay,
            uiport=args.uiport,
        )
    try:
        orchestrator.deploy_computations()
        orchestrator.run(timeout=timeout)
        if timer:
            timer.cancel()
        if not timeout_stopped:
            if orchestrator.status == "TIMEOUT":
                _results("TIMEOUT")
                sys.exit(0)
            elif orchestrator.status != "STOPPED":
                _results("FINISHED")
                sys.exit(0)

        # in case it did not stop, dump remaining threads

    except Exception as e:
        logger.error(e, exc_info=1)
        orchestrator.stop_agents(5)
        orchestrator.stop()
        _results("ERROR")
Exemple #3
0
def run_cmd(args, timer=None, timeout=None):
    logger.debug('dcop command "run" with arguments {}'.format(args))

    global INFINITY, collect_on, output_file
    INFINITY = args.infinity
    collect_on = args.collect_on
    output_file = args.output

    period = None
    if args.collect_on == "period":
        period = 1 if args.period is None else args.period
    else:
        if args.period is not None:
            _error('Cannot use "period" argument when collect_on is not '
                   '"period"')

    csv_cb = prepare_metrics_files(args.run_metrics, args.end_metrics,
                                   collect_on)

    _, algo_module, graph_module = _load_modules(None, args.algo)

    global dcop
    logger.info("loading dcop from {}".format(args.dcop_files))
    dcop = load_dcop_from_file(args.dcop_files)

    dcop = filter_dcop(dcop)

    if args.distribution in DISTRIBUTION_METHODS:
        dist_module, algo_module, graph_module = _load_modules(
            args.distribution, args.algo)
    else:
        dist_module, algo_module, graph_module = _load_modules(None, args.algo)

    logger.info("loading scenario from {}".format(args.scenario))
    scenario = load_scenario_from_file(args.scenario)

    logger.info("Building computation graph ")
    cg = graph_module.build_computation_graph(dcop)

    logger.info("Distributing computation graph ")
    if dist_module is not None:
        distribution = dist_module.distribute(
            cg,
            dcop.agents.values(),
            hints=dcop.dist_hints,
            computation_memory=algo_module.computation_memory,
            communication_load=algo_module.communication_load,
        )
    else:
        distribution = load_dist_from_file(args.distribution)
    logger.debug("Distribution Computation graph: %s ", distribution)

    algo = build_algo_def(algo_module, args.algo, dcop.objective,
                          args.algo_params)

    # Setup metrics collection
    collector_queue = Queue()
    collect_t = Thread(target=collect_tread,
                       args=[collector_queue, csv_cb],
                       daemon=True)
    collect_t.start()

    global orchestrator
    if args.mode == "thread":
        orchestrator = run_local_thread_dcop(
            algo,
            cg,
            distribution,
            dcop,
            INFINITY,
            collector=collector_queue,
            collect_moment=args.collect_on,
            period=period,
            replication=args.replication_method,
        )
    elif args.mode == "process":

        # Disable logs from agents, they are in other processes anyway
        agt_logs = logging.getLogger("pydcop.agent")
        agt_logs.disabled = True

        # When using the (default) 'fork' start method, http servers on agent's
        # processes do not work (why ?)
        multiprocessing.set_start_method("spawn")
        orchestrator = run_local_process_dcop(
            algo,
            cg,
            distribution,
            dcop,
            INFINITY,
            collector=collector_queue,
            collect_moment=args.collect_on,
            period=period,
        )

    orchestrator.set_error_handler(_orchestrator_error)

    try:
        orchestrator.deploy_computations()
        orchestrator.start_replication(args.ktarget)
        if orchestrator.wait_ready():
            orchestrator.run(scenario, timeout=timeout)
            if timer:
                timer.cancel()
            if not timeout_stopped:
                if orchestrator.status == "TIMEOUT":
                    _results("TIMEOUT")
                    sys.exit(0)
                elif orchestrator.status != "STOPPED":
                    _results("FINISHED")
                    sys.exit(0)

    except Exception as e:
        logger.error(e, exc_info=1)
        print(e)
        for th in threading.enumerate():
            print(th)
            traceback.print_stack(sys._current_frames()[th.ident])
            print()
        orchestrator.stop_agents(5)
        orchestrator.stop()
        _results("ERROR")
Exemple #4
0
def run_cmd(args, timer):
    logger.debug('dcop command "run" with arguments {}'.format(args))

    global INFINITY
    INFINITY = args.infinity

    global collect_on
    collect_on = args.collect_on
    period = None
    if args.collect_on == 'period':
        period = 1 if args.period is None else args.period
    else:
        if args.period is not None:
            _error('Cannot use "period" argument when collect_on is not '
                   '"period"')

    csv_cb = prepare_metrics_files(args.run_metrics, args.end_metrics,
                                   collect_on)

    _, algo_module, graph_module = _load_modules(None, args.algo)

    global dcop
    logger.info('loading dcop from {}'.format(args.dcop_files))
    dcop = load_dcop_from_file(args.dcop_files)

    logger.info('Loading distribution from {}'.format(args.distribution))
    distribution = load_dist_from_file(args.distribution)

    # FIXME: load replica dist from file and pass to orchestrator
    # logger.info('Loading replica distribution from {}'.format(
    #     args.distribution))
    # replica_dist = load_replica_dist_from_file(args.replica_dist)
    # logger.info('Dcop distribution : %s', replica_dist)

    logger.info('loading scenario from {}'.format(args.scenario))
    scenario = load_scenario_from_file(args.scenario)

    logger.info('Building computation graph ')
    cg = graph_module.build_computation_graph(dcop)

    algo = build_algo_def(algo_module, args.algo, dcop.objective,
                          args.algo_params)

    # Setup metrics collection
    collector_queue = Queue()
    collect_t = Thread(target=collect_tread,
                       args=[collector_queue, csv_cb],
                       daemon=True)
    collect_t.start()

    global orchestrator
    if args.mode == 'thread':
        orchestrator = run_local_thread_dcop(
            algo,
            cg,
            distribution,
            dcop,
            INFINITY,
            collector=collector_queue,
            collect_moment=args.collect_on,
            period=period,
            replication=args.replication_method)
    elif args.mode == 'process':

        # Disable logs from agents, they are in other processes anyway
        agt_logs = logging.getLogger('pydcop.agent')
        agt_logs.disabled = True

        # When using the (default) 'fork' start method, http servers on agent's
        # processes do not work (why ?)
        multiprocessing.set_start_method('spawn')
        orchestrator = run_local_process_dcop(algo,
                                              cg,
                                              distribution,
                                              dcop,
                                              INFINITY,
                                              collector=collector_queue,
                                              collect_moment=args.collect_on,
                                              period=period)

    orchestrator.set_error_handler(_orchestrator_error)

    try:
        orchestrator.deploy_computations()
        orchestrator.start_replication(args.ktarget)
        if orchestrator.wait_ready():
            orchestrator.run(scenario)
        # orchestrator.run(scenario) # FIXME
    except Exception as e:
        logger.error(e, exc_info=1)
        print(e)
        for th in threading.enumerate():
            print(th)
            traceback.print_stack(sys._current_frames()[th.ident])
            print()
        orchestrator.stop_agents(5)
        orchestrator.stop()
        _results('ERROR', e)
def run_cmd(args, timer: Timer):

    logger.debug('Distribution replicas : %s', args)
    global orchestrator

    # global dcop
    logger.info('loading dcop from {}'.format(args.dcop_files))
    dcop = load_dcop_from_file(args.dcop_files)

    try:
        algo_module = import_module('pydcop.algorithms.{}'.format(args.algo))
        algo = build_algo_def(algo_module, args.algo, dcop.objective,
                              [])  # FIXME : algo params needed?

        graph_module = import_module('pydcop.computations_graph.{}'.format(
            algo_module.GRAPH_TYPE))
        logger.info('Building computation graph ')
        cg = graph_module.build_computation_graph(dcop)
        logger.info('Computation graph : %s', cg)

    except ImportError:
        _error('Could not find module for algorithm {} or graph model '
               'for this algorithm'.format(args.algo))

    logger.info('loading distribution from {}'.format(args.distribution))
    distribution = load_dist_from_file(args.distribution)

    INFINITY = 10000  # FIXME should not be mandatory

    global orchestrator
    if args.mode == 'thread':
        orchestrator = run_local_thread_dcop(algo,
                                             cg,
                                             distribution,
                                             dcop,
                                             INFINITY,
                                             replication=args.replication)
    elif args.mode == 'process':

        # Disable logs from agents, they are in other processes anyway
        agt_logs = logging.getLogger('pydcop.agent')
        agt_logs.disabled = True

        # When using the (default) 'fork' start method, http servers on agent's
        # processes do not work (why ?)
        multiprocessing.set_start_method('spawn')
        orchestrator = run_local_process_dcop(algo,
                                              cg,
                                              distribution,
                                              dcop,
                                              INFINITY,
                                              replication=args.replication)

    try:
        orchestrator.deploy_computations()
        orchestrator.start_replication(args.ktarget)
        orchestrator.wait_ready()
        orchestrator.stop_agents(5)
        orchestrator.stop()
        timer.cancel()
        rep_dist = {
            c: list(hosts)
            for c, hosts in orchestrator.mgt.replica_hosts.items()
        }
        result = {
            'inputs': {
                'dcop': args.dcop_files,
                'algo': args.algo,
                'replication': args.replication,
                'k': args.ktarget
            },
            'replica_dist': rep_dist
        }
        result['inputs']['distribution'] = args.distribution
        if args.output is not None:
            with open(args.output, encoding='utf-8', mode='w') as fo:
                fo.write(yaml.dump(result))

        print(yaml.dump(result))
        sys.exit(0)

        # TODO : retrieve and display replica distribution
        # Each agent should send back to the orchestrator the agents hosting
        # the replicas for each of it's computations
    except Exception as e:
        orchestrator.stop_agents(5)
        orchestrator.stop()
        _error('ERROR', e)
def run_cmd(args, timer: Timer = None, timeout=None):
    logger.debug("Distribution replicas : %s", args)
    global orchestrator

    # global dcop
    logger.info("loading dcop from {}".format(args.dcop_files))
    dcop = load_dcop_from_file(args.dcop_files)

    try:
        algo_module = load_algorithm_module(args.algo)
        algo = build_algo_def(algo_module, args.algo, dcop.objective,
                              [])  # FIXME : algo params needed?

        graph_module = import_module("pydcop.computations_graph.{}".format(
            algo_module.GRAPH_TYPE))
        logger.info("Building computation graph ")
        cg = graph_module.build_computation_graph(dcop)
        logger.info("Computation graph : %s", cg)

    except ImportError:
        _error("Could not find module for algorithm {} or graph model "
               "for this algorithm".format(args.algo))

    logger.info("loading distribution from {}".format(args.distribution))
    distribution = load_dist_from_file(args.distribution)

    INFINITY = 10000  # FIXME should not be mandatory

    global orchestrator
    if args.mode == "thread":
        orchestrator = run_local_thread_dcop(algo,
                                             cg,
                                             distribution,
                                             dcop,
                                             INFINITY,
                                             replication=args.replication)
    elif args.mode == "process":

        # Disable logs from agents, they are in other processes anyway
        agt_logs = logging.getLogger("pydcop.agent")
        agt_logs.disabled = True

        # When using the (default) 'fork' start method, http servers on agent's
        # processes do not work (why ?)
        multiprocessing.set_start_method("spawn")
        orchestrator = run_local_process_dcop(algo,
                                              cg,
                                              distribution,
                                              dcop,
                                              INFINITY,
                                              replication=args.replication)

    try:
        orchestrator.deploy_computations()
        start_t = time.time()
        orchestrator.start_replication(args.ktarget)
        orchestrator.wait_ready()
        # print(f" Replication Metrics {orchestrator.replication_metrics()}")
        metrics = orchestrator.replication_metrics()
        msg_count, msg_size = 0, 0
        for a in metrics:
            msg_count += metrics[a]["count_ext_msg"]
            msg_size += metrics[a]["size_ext_msg"]
        # print(f" Count: {msg_count} - Size {msg_size}")
        duration = time.time() - start_t
        if timer:
            timer.cancel()
        rep_dist = {
            c: list(hosts)
            for c, hosts in orchestrator.mgt.replica_hosts.items()
        }
        orchestrator.stop_agents(5)
        orchestrator.stop()
        result = {
            "inputs": {
                "dcop": args.dcop_files,
                "algo": args.algo,
                "replication": args.replication,
                "k": args.ktarget,
            },
            "metrics": {
                "duration": duration,
                "msg_size": msg_size,
                "msg_count": msg_count,
            },
            "replica_dist": rep_dist,
        }
        result["inputs"]["distribution"] = args.distribution
        if args.output is not None:
            with open(args.output, encoding="utf-8", mode="w") as fo:
                fo.write(yaml.dump(result))
        else:
            print(yaml.dump(result))
        sys.exit(0)

        # TODO : retrieve and display replica distribution
        # Each agent should send back to the orchestrator the agents hosting
        # the replicas for each of it's computations
    except Exception as e:
        orchestrator.stop_agents(5)
        orchestrator.stop()
        _error("ERROR", e)