def Parameter_Server(Synchronizer, cluster, log_path, model_path, procs): config = tf.ConfigProto( allow_soft_placement=True, log_device_placement=False, ) config.gpu_options.allow_growth = True server = tf.train.Server(cluster, job_name="ps", task_index=0, config=config) sess = tf.Session(target=server.target, config=config) summary_writer = tf.summary.FileWriter(log_path) Net = MiniNetwork(sess=sess, summary_writer=summary_writer, rl_training=FLAGS.training, cluster=cluster, index=0, device=DEVICE[0 % len(DEVICE)], ppo_load_path=FLAGS.restore_model_path, ppo_save_path=model_path, ob_space_add=FLAGS.ob_space_add, act_space_add=FLAGS.act_space_add, freeze_head=FLAGS.freeze_head, use_bn=FLAGS.use_bn, use_sep_net=FLAGS.use_sep_net, restore_model=FLAGS.restore_model, restore_from=FLAGS.restore_from, restore_to=FLAGS.restore_to, load_latest=FLAGS.load_latest, add_image=FLAGS.add_image, partial_restore=FLAGS.partial_restore, weighted_sum_type=FLAGS.weighted_sum_type, initial_type=FLAGS.initial_type) agent = mini_source_agent.MiniSourceAgent(index=-1, net=Net, restore_model=FLAGS.restore_model, rl_training=FLAGS.training, ob_space_add=FLAGS.ob_space_add) print("Parameter server: waiting for cluster connection...") sess.run(tf.report_uninitialized_variables()) print("Parameter server: cluster ready!") print("Parameter server: initializing variables...") agent.init_network() print("Parameter server: variables initialized") update_counter = 0 max_win_rate = 0. latest_win_rate = 0. while update_counter < TRAIN_ITERS: agent.reset_old_network() # wait for update Synchronizer.wait() logging("Update Network!") # TODO count the time , compare cpu and gpu time.sleep(1) # update finish Synchronizer.wait() logging("Update Network finished!") steps, win_rate = agent.update_summary(update_counter) logging("Steps: %d, win rate: %f" % (steps, win_rate)) update_counter += 1 if win_rate >= max_win_rate: agent.save_model() max_win_rate = win_rate latest_win_rate = win_rate agent.net.save_latest_policy() return max_win_rate, latest_win_rate
def Worker(index, update_game_num, Synchronizer, cluster, model_path, log_path): config = tf.ConfigProto( allow_soft_placement=True, log_device_placement=False, ) config.gpu_options.allow_growth = True worker = tf.train.Server(cluster, job_name="worker", task_index=index, config=config) sess = tf.Session(target=worker.target, config=config) summary_writer = tf.summary.FileWriter(log_path) Net = MiniNetwork(sess=sess, summary_writer=summary_writer, rl_training=FLAGS.training, cluster=cluster, index=index, device=DEVICE[index % len(DEVICE)], ppo_load_path=FLAGS.restore_model_path, ppo_save_path=model_path, ob_space_add=FLAGS.ob_space_add, act_space_add=FLAGS.act_space_add, freeze_head=FLAGS.freeze_head, use_bn=FLAGS.use_bn, use_sep_net=FLAGS.use_sep_net, restore_model=FLAGS.restore_model, restore_from=FLAGS.restore_from, restore_to=FLAGS.restore_to, load_latest=FLAGS.load_latest, add_image=FLAGS.add_image, partial_restore=FLAGS.partial_restore, weighted_sum_type=FLAGS.weighted_sum_type, initial_type=FLAGS.initial_type) global_buffer = Buffer() agents = [] for i in range(THREAD_NUM): agent = mini_source_agent.MiniSourceAgent(index=i, global_buffer=global_buffer, net=Net, restore_model=FLAGS.restore_model, rl_training=FLAGS.training, strategy_agent=None, ob_space_add=FLAGS.ob_space_add) agents.append(agent) print("Worker %d: waiting for cluster connection..." % index) sess.run(tf.report_uninitialized_variables()) print("Worker %d: cluster ready!" % index) while len(sess.run(tf.report_uninitialized_variables())): print("Worker %d: waiting for variable initialization..." % index) time.sleep(1) print("Worker %d: variables initialized" % index) game_num = np.ceil(update_game_num // THREAD_NUM) UPDATE_EVENT.clear() ROLLING_EVENT.set() # Run threads threads = [] for i in range(THREAD_NUM - 1): t = threading.Thread(target=run_thread, args=(agents[i], game_num, Synchronizer, FLAGS.difficulty)) threads.append(t) t.daemon = True t.start() time.sleep(3) run_thread(agents[-1], game_num, Synchronizer, FLAGS.difficulty) for t in threads: t.join()
def Parameter_Server(Synchronizer, cluster, log_path, model_path, procs): config = tf.ConfigProto( allow_soft_placement=True, log_device_placement=False, ) config.gpu_options.allow_growth = True server = tf.train.Server(cluster, job_name="ps", task_index=0, config=config) #config.gpu_options.per_process_gpu_memory_fraction = 0.2 sess = tf.Session(target=server.target, config=config) summary_writer = tf.summary.FileWriter(log_path) mini_net = MiniNetwork(sess, index=0, summary_writer=summary_writer, rl_training=True, cluster=cluster, ppo_load_path=FLAGS.restore_model_path, ppo_save_path=model_path, freeze_head=FLAGS.freeze_head, use_bn=FLAGS.use_bn, use_sep_net=FLAGS.use_sep_net, restore_model=FLAGS.restore_model, restore_from=FLAGS.restore_from, restore_to=FLAGS.restore_to) agent = MiniAgent(agent_id=-1, global_buffer=Buffer(), net=mini_net, restore_model=FLAGS.restore_model) print("Parameter server: waiting for cluster connection...") sess.run(tf.report_uninitialized_variables()) print("Parameter server: cluster ready!") print("Parameter server: initializing variables...") agent.init_network() print("Parameter server: variables initialized") last_win_rate = 0. update_counter = 0 while update_counter < TRAIN_ITERS: agent.reset_old_network() # wait for update Synchronizer.wait() logging("Update Network!") # TODO count the time , compare cpu and gpu time.sleep(1) # update finish Synchronizer.wait() logging("Update Network finished!") steps, win_rate = agent.update_summary(update_counter) logging("Steps: %d, win rate: %f" % (steps, win_rate)) update_counter += 1 if win_rate >= last_win_rate: agent.save_model() last_win_rate = win_rate for p in procs: print('Process terminate') p.terminate()
def Worker(index, update_game_num, Synchronizer, cluster, model_path): config = tf.ConfigProto( allow_soft_placement=True, log_device_placement=False, ) config.gpu_options.allow_growth = True worker = tf.train.Server(cluster, job_name="worker", task_index=index, config=config) #config.gpu_options.per_process_gpu_memory_fraction = 0.2 sess = tf.Session(target=worker.target, config=config) mini_net = MiniNetwork(sess, index=index, summary_writer=None, rl_training=True, cluster=cluster, ppo_load_path=FLAGS.restore_model_path, ppo_save_path=model_path, freeze_head=FLAGS.freeze_head, use_bn=FLAGS.use_bn, use_sep_net=FLAGS.use_sep_net, restore_model=FLAGS.restore_model, restore_from=FLAGS.restore_from, restore_to=FLAGS.restore_to) global_buffer = Buffer() agents = [] for i in range(THREAD_NUM): agent = MiniAgent(agent_id=i, global_buffer=global_buffer, net=mini_net, restore_model=FLAGS.restore_model) agents.append(agent) print("Worker %d: waiting for cluster connection..." % index) sess.run(tf.report_uninitialized_variables()) print("Worker %d: cluster ready!" % index) while len(sess.run(tf.report_uninitialized_variables())): print("Worker %d: waiting for variable initialization..." % index) time.sleep(1) print("Worker %d: variables initialized" % index) game_num = np.ceil(update_game_num // THREAD_NUM) UPDATE_EVENT.clear() ROLLING_EVENT.set() difficulty = INITIAL_DIFF # Run threads threads = [] for i in range(THREAD_NUM - 1): t = threading.Thread(target=run_thread, args=(agents[i], game_num, Synchronizer, difficulty)) threads.append(t) t.daemon = True t.start() time.sleep(3) run_thread(agents[-1], game_num, Synchronizer, difficulty) for t in threads: t.join()