Example #1
0
def coordinator():
    """
    Deploy the coordinator configuration to the coordinator node
    """
    if env.host in util.get_coordinator_role():
        _LOGGER.info("Setting coordinator configuration for " + env.host)
        configure_presto(coord.Coordinator().get_conf(),
                         constants.REMOTE_CONF_DIR)
Example #2
0
        #need to collect all the processes lists
        all_t_b_processes = comm_world.allgather(t_b_processes)
        w_processes = set()
        for gb in all_t_b_processes:
            if gb:
                hgb = map(tuple, gb)
                w_processes.update(hgb)
        if block_num == 0:
            print("all collect processes", w_processes)
            ## now you have the ranks that needs to be initialized in rings.

    # MPI process 0 coordinates the Bayesian optimization procedure
    if block_num == 0:
        opt_coordinator = coordinator.Coordinator(
            comm_world, num_blocks, model_provider.parameters,
            (args.hyper_opt == 'genetic'), args.population)
        if args.previous_state: opt_coordinator.load(args.previous_state)
        if args.target_objective:
            opt_coordinator.target_fom = args.target_objective
        opt_coordinator.label = args.label
        opt_coordinator.run(num_iterations=args.num_iterations)
        opt_coordinator.record_details()
    else:
        print("Process {} on block {}, rank {}, create a process block".format(
            comm_world.Get_rank(), block_num, comm_block.Get_rank()))
        data = H5Data(batch_size=args.batch,
                      cache=args.caching_dir,
                      preloading=args.data_preload,
                      features_name=features_name,
                      labels_name=labels_name)
Example #3
0
#!/usr/bin/env python
import os
import channel
import coordinator
import participant

chan = channel.Channel()
chan.channel.flushall()

NP = 3
coord = coordinator.Coordinator()
parts = [participant.Participant() for i in range(NP)]

pid = os.fork()
if pid == 0:
    coord.run()
    os._exit(0)

for i in range(NP):
    pid = os.fork()
    if pid == 0:
        parts[i].run()
        os._exit(0)
Example #4
0
    block_num = get_block_num(comm_world, args.block_size)
    device = mm.get_device(comm_world, num_blocks)
    backend = 'tensorflow'
    print("Process {} using device {}".format(comm_world.Get_rank(), device))
    comm_block = comm_world.Split(block_num)

    param_ranges = [
        (0.0, 1.0),  # dropout
        (1, 6),  # kernel_size
        (1., 10.),  # lr exponent
    ]

    # MPI process 0 coordinates the Bayesian optimization procedure
    if block_num == 0:
        model_fn = lambda x, y, z: mpi.test_cnn(x, y, np.exp(-z))
        opt_coordinator = coordinator.Coordinator(comm_world, num_blocks,
                                                  param_ranges, model_fn)
        opt_coordinator.run(num_iterations=30)
    else:
        data = H5Data(batch_size=args.batch,
                      features_name='Images',
                      labels_name='Labels')
        data.set_file_names(train_list)
        validate_every = data.count_data() / args.batch
        algo = Algo(args.optimizer,
                    loss=args.loss,
                    validate_every=validate_every,
                    sync_every=args.sync_every)
        os.environ['KERAS_BACKEND'] = backend
        import_keras()
        import keras.callbacks as cbks
        callbacks = []
            #print ("translate process ranks from ",b_processes,"to",t_b_processes)

        #need to collect all the processes lists
        all_t_b_processes = comm_world.allgather(t_b_processes)
        w_processes = set()
        for gb in all_t_b_processes:
            if gb:
                hgb = map(tuple, gb)
                w_processes.update(hgb)
        if block_num == 0:
            print("all collect processes", w_processes)
            ## now you have the ranks that needs to be initialized in rings.

    # MPI process 0 coordinates the Bayesian optimization procedure
    if block_num == 0:
        opt_coordinator = coordinator.Coordinator(comm_world, num_blocks,
                                                  model_provider.parameters)
        if args.previous_state: opt_coordinator.load(args.previous_state)
        if args.target_objective:
            opt_coordinator.target_fom = args.target_objective
        opt_coordinator.run(num_iterations=args.num_iterations)
    else:
        print("Process {} on block {}, rank {}, create a process block".format(
            comm_world.Get_rank(), block_num, comm_block.Get_rank()))
        data = H5Data(batch_size=args.batch,
                      cache=args.caching_dir,
                      preloading=args.data_preload,
                      features_name=features_name,
                      labels_name=labels_name)
        data.set_file_names(train_list)
        validate_every = data.count_data() / args.batch
        print(data.count_data(), "samples to train on")