def coordinator(): """ Deploy the coordinator configuration to the coordinator node """ if env.host in util.get_coordinator_role(): _LOGGER.info("Setting coordinator configuration for " + env.host) configure_presto(coord.Coordinator().get_conf(), constants.REMOTE_CONF_DIR)
#need to collect all the processes lists all_t_b_processes = comm_world.allgather(t_b_processes) w_processes = set() for gb in all_t_b_processes: if gb: hgb = map(tuple, gb) w_processes.update(hgb) if block_num == 0: print("all collect processes", w_processes) ## now you have the ranks that needs to be initialized in rings. # MPI process 0 coordinates the Bayesian optimization procedure if block_num == 0: opt_coordinator = coordinator.Coordinator( comm_world, num_blocks, model_provider.parameters, (args.hyper_opt == 'genetic'), args.population) if args.previous_state: opt_coordinator.load(args.previous_state) if args.target_objective: opt_coordinator.target_fom = args.target_objective opt_coordinator.label = args.label opt_coordinator.run(num_iterations=args.num_iterations) opt_coordinator.record_details() else: print("Process {} on block {}, rank {}, create a process block".format( comm_world.Get_rank(), block_num, comm_block.Get_rank())) data = H5Data(batch_size=args.batch, cache=args.caching_dir, preloading=args.data_preload, features_name=features_name, labels_name=labels_name)
#!/usr/bin/env python import os import channel import coordinator import participant chan = channel.Channel() chan.channel.flushall() NP = 3 coord = coordinator.Coordinator() parts = [participant.Participant() for i in range(NP)] pid = os.fork() if pid == 0: coord.run() os._exit(0) for i in range(NP): pid = os.fork() if pid == 0: parts[i].run() os._exit(0)
block_num = get_block_num(comm_world, args.block_size) device = mm.get_device(comm_world, num_blocks) backend = 'tensorflow' print("Process {} using device {}".format(comm_world.Get_rank(), device)) comm_block = comm_world.Split(block_num) param_ranges = [ (0.0, 1.0), # dropout (1, 6), # kernel_size (1., 10.), # lr exponent ] # MPI process 0 coordinates the Bayesian optimization procedure if block_num == 0: model_fn = lambda x, y, z: mpi.test_cnn(x, y, np.exp(-z)) opt_coordinator = coordinator.Coordinator(comm_world, num_blocks, param_ranges, model_fn) opt_coordinator.run(num_iterations=30) else: data = H5Data(batch_size=args.batch, features_name='Images', labels_name='Labels') data.set_file_names(train_list) validate_every = data.count_data() / args.batch algo = Algo(args.optimizer, loss=args.loss, validate_every=validate_every, sync_every=args.sync_every) os.environ['KERAS_BACKEND'] = backend import_keras() import keras.callbacks as cbks callbacks = []
#print ("translate process ranks from ",b_processes,"to",t_b_processes) #need to collect all the processes lists all_t_b_processes = comm_world.allgather(t_b_processes) w_processes = set() for gb in all_t_b_processes: if gb: hgb = map(tuple, gb) w_processes.update(hgb) if block_num == 0: print("all collect processes", w_processes) ## now you have the ranks that needs to be initialized in rings. # MPI process 0 coordinates the Bayesian optimization procedure if block_num == 0: opt_coordinator = coordinator.Coordinator(comm_world, num_blocks, model_provider.parameters) if args.previous_state: opt_coordinator.load(args.previous_state) if args.target_objective: opt_coordinator.target_fom = args.target_objective opt_coordinator.run(num_iterations=args.num_iterations) else: print("Process {} on block {}, rank {}, create a process block".format( comm_world.Get_rank(), block_num, comm_block.Get_rank())) data = H5Data(batch_size=args.batch, cache=args.caching_dir, preloading=args.data_preload, features_name=features_name, labels_name=labels_name) data.set_file_names(train_list) validate_every = data.count_data() / args.batch print(data.count_data(), "samples to train on")