def hypertune(game): recreate_directory_structure() if not FLAGS.resume and FLAGS.train: for i in range(FLAGS.nb_hyperparam_runs): lr = 10**np.random.uniform(np.log10(10**(-2)), np.log10( (10**(-4)))) gamma = np.random.uniform(0.7, 1.0) model_name = "d_{}__lr_{}__gamma_{}".format(game, lr, gamma) checkpoint_dir = os.path.join(FLAGS.checkpoint_dir, model_name) summaries_dir = os.path.join(FLAGS.summaries_dir, model_name) frames_dir = os.path.join(FLAGS.frames_dir, model_name) settings = { "lr": lr, "gamma": gamma, "game": game, "model_name": model_name, "checkpoint_dir": checkpoint_dir, "summaries_dir": summaries_dir, "frames_dir": frames_dir } run(settings) else: model_instances = os.listdir(FLAGS.checkpoint_dir) lrs = [inst.split("__")[1].split("_")[1] for inst in model_instances] gammas = [ inst.split("__")[2].split("_")[1] for inst in model_instances ] game = model_instances[0].split("__")[0].split("_")[1] val_envs = TwoArms.get_envs(game, FLAGS.nb_test_episodes) for i in range(len(model_instances)): lr = lrs[i] gamma = gammas[i] model_name = "d_{}__lr_{}__gamma_{}".format(game, lr, gamma) print(model_name) checkpoint_dir = os.path.join(FLAGS.checkpoint_dir, model_name) summaries_dir = os.path.join(FLAGS.summaries_dir, model_name) frames_dir = os.path.join(FLAGS.frames_dir, model_name) settings = { "lr": lr, "gamma": gamma, "game": game, "model_name": model_name, "checkpoint_dir": checkpoint_dir, "summaries_dir": summaries_dir, "frames_dir": frames_dir, "envs": val_envs } run(settings)
def hypertune(game): recreate_directory_structure() if not FLAGS.resume and FLAGS.train: for i in range(FLAGS.nb_hyperparam_runs): lr = 10 ** np.random.uniform(np.log10(10 ** (-2)), np.log10((10 ** (-4)))) gamma = np.random.uniform(0.7, 1.0) model_name = "d_{}__lr_{}__gamma_{}".format(game, lr, gamma) checkpoint_dir = os.path.join(FLAGS.checkpoint_dir, model_name) summaries_dir = os.path.join(FLAGS.summaries_dir, model_name) frames_dir = os.path.join(FLAGS.frames_dir, model_name) settings = {"lr": lr, "gamma": gamma, "game": game, "model_name": model_name, "checkpoint_dir": checkpoint_dir, "summaries_dir": summaries_dir, "frames_dir": frames_dir} run(settings) else: model_instances = os.listdir(FLAGS.checkpoint_dir) lrs = [inst.split("__")[1].split("_")[1] for inst in model_instances] gammas = [inst.split("__")[2].split("_")[1] for inst in model_instances] game = model_instances[0].split("__")[0].split("_")[1] val_envs = TwoArms.get_envs(game, FLAGS.nb_test_episodes) for i in range(len(model_instances)): lr = lrs[i] gamma = gammas[i] model_name = "d_{}__lr_{}__gamma_{}".format(game, lr, gamma) print(model_name) checkpoint_dir = os.path.join(FLAGS.checkpoint_dir, model_name) summaries_dir = os.path.join(FLAGS.summaries_dir, model_name) frames_dir = os.path.join(FLAGS.frames_dir, model_name) settings = {"lr": lr, "gamma": gamma, "game": game, "model_name": model_name, "checkpoint_dir": checkpoint_dir, "summaries_dir": summaries_dir, "frames_dir": frames_dir, "envs": val_envs} run(settings)
def run_baseline(): test_envs = TwoArms.get_envs(FLAGS.game, FLAGS.nb_test_episodes) model_name = "baseline" settings = { "model_name": model_name, "game": FLAGS.game, "envs": test_envs, "exp_type": "evaluate_baseline" } tf.reset_default_graph() with tf.device("/cpu:0"): num_agents = 1 agents = [] envs = [] for i in range(num_agents): if settings["game"] == '11arms': this_env = ElevenArms() else: this_env = TwoArms(settings["game"]) envs.append(this_env) for i in range(num_agents): agents.append(RandomAgent(envs[i], i, settings)) with tf.Session() as sess: coord = tf.train.Coordinator() agent_threads = [] for agent in agents: agent_play = lambda: agent.play(coord) thread = threading.Thread(target=agent_play) thread.start() agent_threads.append(thread) coord.join(agent_threads)
def validate_hypertune(): # recreate_directory_structure() model_instances = os.listdir(FLAGS.checkpoint_dir) lrs = [inst.split("__")[1].split("_")[1] for inst in model_instances] gammas = [inst.split("__")[2].split("_")[1] for inst in model_instances] game = model_instances[0].split("__")[0].split("_")[1] val_envs = TwoArms.get_envs(game, FLAGS.nb_test_episodes) with concurrent.futures.ThreadPoolExecutor(max_workers=multiprocessing.cpu_count()) as executor: for i in range(len(model_instances)): _ = executor.submit(thread_processing, lrs[i], gammas[i], val_envs, game)
def evaluate_one_test(): # test_envs = TwoArms.get_envs(FLAGS.game, FLAGS.nb_test_episodes) # # checkpoint_dir = os.path.join(FLAGS.checkpoint_dir, FLAGS.model_name) # summaries_dir = os.path.join(FLAGS.summaries_dir, FLAGS.model_name) # frames_dir = os.path.join(FLAGS.frames_dir, FLAGS.model_name) # # settings = {"lr": FLAGS.lr, # "gamma": FLAGS.gamma, # "game": FLAGS.game, # "model_name": FLAGS.model_name, # "checkpoint_dir": checkpoint_dir, # "summaries_dir": summaries_dir, # "frames_dir": frames_dir, # "load_from": os.path.join(FLAGS.checkpoint_dir, FLAGS.model_name), # "envs": test_envs, # "mode": "evaluate_once"} # # run(settings) for _ in range(5): model_name = "best_{}__lr_{}__gamma_{}".format(FLAGS.game, FLAGS.lr, FLAGS.gamma) load_from_model_name = "d_{}__lr_{}__gamma_{}".format( FLAGS.best_model_game, FLAGS.lr, FLAGS.gamma) print(load_from_model_name) checkpoint_dir = os.path.join(FLAGS.checkpoint_dir, model_name) summaries_dir = os.path.join(FLAGS.summaries_dir, model_name) frames_dir = os.path.join(FLAGS.frames_dir, model_name) test_envs = TwoArms.get_envs(FLAGS.game, FLAGS.nb_test_episodes) settings = { "lr": FLAGS.lr, "gamma": FLAGS.gamma, "game": FLAGS.game, "model_name": model_name, "checkpoint_dir": checkpoint_dir, "summaries_dir": summaries_dir, "frames_dir": frames_dir, "load_from": os.path.join(FLAGS.checkpoint_dir, load_from_model_name), "envs": test_envs, "mode": "eval" } run(settings) with open(FLAGS.results_eval_file, "r") as f: line = f.readline() mean_regrets = line.rstrip('\n').split(' ')
def test(): if tf.gfile.Exists(FLAGS.results_test_file): os.remove(FLAGS.results_test_file) with open(FLAGS.results_val_file, "r") as f: content = f.readlines() lines = [line.rstrip('\n') for line in content] games = [] lrs = [] gammas = [] mean_regrets = [] mean_nb_subopt_armss = [] for line in lines: results = line.split(" ") results = results[1:] game, lr, gamma, mean_regret, mean_nb_subopt_arms = [r.split("=")[1] for r in results] games.append(game) lrs.append(lr) gammas.append(gamma) mean_regrets.append(mean_regret) mean_nb_subopt_armss.append(mean_nb_subopt_arms) indices_best_n = np.asarray(mean_regrets).argsort()[-FLAGS.top:][::-1] best_lrs = [lrs[i] for i in indices_best_n] best_gammas = [gammas[i] for i in indices_best_n] best_game = games[0] test_envs = TwoArms.get_envs(best_game, FLAGS.nb_test_episodes) for i in range(len(indices_best_n)): model_name = "best_{}__lr_{}__gamma_{}".format(best_game, best_lrs[i], best_gammas[i]) load_from_model_name = "d_{}__lr_{}__gamma_{}".format(best_game, best_lrs[i], best_gammas[i]) print(model_name) checkpoint_dir = os.path.join(FLAGS.checkpoint_dir, model_name) summaries_dir = os.path.join(FLAGS.summaries_dir, model_name) frames_dir = os.path.join(FLAGS.frames_dir, model_name) settings = {"lr": best_lrs[i], "gamma": best_gammas[i], "game": best_game, "model_name": model_name, "checkpoint_dir": checkpoint_dir, "summaries_dir": summaries_dir, "frames_dir": frames_dir, "load_from": os.path.join(FLAGS.checkpoint_dir, load_from_model_name), "envs": test_envs} run(settings)
def evaluate_one_test(): # test_envs = TwoArms.get_envs(FLAGS.game, FLAGS.nb_test_episodes) # # checkpoint_dir = os.path.join(FLAGS.checkpoint_dir, FLAGS.model_name) # summaries_dir = os.path.join(FLAGS.summaries_dir, FLAGS.model_name) # frames_dir = os.path.join(FLAGS.frames_dir, FLAGS.model_name) # # settings = {"lr": FLAGS.lr, # "gamma": FLAGS.gamma, # "game": FLAGS.game, # "model_name": FLAGS.model_name, # "checkpoint_dir": checkpoint_dir, # "summaries_dir": summaries_dir, # "frames_dir": frames_dir, # "load_from": os.path.join(FLAGS.checkpoint_dir, FLAGS.model_name), # "envs": test_envs, # "mode": "evaluate_once"} # # run(settings) for _ in range(5): model_name = "best_{}__lr_{}__gamma_{}".format(FLAGS.game, FLAGS.lr, FLAGS.gamma) load_from_model_name = "d_{}__lr_{}__gamma_{}".format(FLAGS.best_model_game, FLAGS.lr, FLAGS.gamma) print(load_from_model_name) checkpoint_dir = os.path.join(FLAGS.checkpoint_dir, model_name) summaries_dir = os.path.join(FLAGS.summaries_dir, model_name) frames_dir = os.path.join(FLAGS.frames_dir, model_name) test_envs = TwoArms.get_envs(FLAGS.game, FLAGS.nb_test_episodes) settings = {"lr": FLAGS.lr, "gamma": FLAGS.gamma, "game": FLAGS.game, "model_name": model_name, "checkpoint_dir": checkpoint_dir, "summaries_dir": summaries_dir, "frames_dir": frames_dir, "load_from": os.path.join(FLAGS.checkpoint_dir, load_from_model_name), "envs": test_envs, "mode": "eval"} run(settings) with open(FLAGS.results_eval_file, "r") as f: line = f.readline() mean_regrets = line.rstrip('\n').split(' ') mean_regrets = [float(rg) for rg in mean_regrets] mean_rg_avg = np.mean(mean_regrets) print("Avg regret for the model is {}".format(mean_rg_avg))
def validate_hypertune(): # recreate_directory_structure() model_instances = os.listdir(FLAGS.checkpoint_dir) lrs = [inst.split("__")[1].split("_")[1] for inst in model_instances] gammas = [inst.split("__")[2].split("_")[1] for inst in model_instances] game = model_instances[0].split("__")[0].split("_")[1] val_envs = TwoArms.get_envs(game, FLAGS.nb_test_episodes) with concurrent.futures.ThreadPoolExecutor( max_workers=multiprocessing.cpu_count()) as executor: for i in range(len(model_instances)): _ = executor.submit(thread_processing, lrs[i], gammas[i], val_envs, game)
def run(settings): recreate_subdirectory_structure(settings) tf.reset_default_graph() with tf.device("/cpu:0"): global_step = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=settings["lr"]) global_network = ACNetwork('global', None) num_agents = 1 agents = [] envs = [] for i in range(num_agents): if settings["game"] == '11arms': this_env = ElevenArms() else: this_env = TwoArms(settings["game"]) envs.append(this_env) for i in range(num_agents): agents.append(Agent(envs[i], i, optimizer, global_step, settings)) saver = tf.train.Saver(max_to_keep=5) with tf.Session() as sess: coord = tf.train.Coordinator() if FLAGS.resume: ckpt = tf.train.get_checkpoint_state(settings["checkpoint_dir"]) # print("Loading Model from {}".format(ckpt.model_checkpoint_path)) try: saver.restore(sess, ckpt.model_checkpoint_path) except Exception as e: print(sys.exc_info()[0]) print(e) else: sess.run(tf.global_variables_initializer()) agent_threads = [] for agent in agents: agent_play = lambda: agent.play(sess, coord, saver) thread = threading.Thread(target=agent_play) thread.start() agent_threads.append(thread) coord.join(agent_threads)
def run_one_test(): if FLAGS.train: recreate_directory_structure() model_name = "one_test" checkpoint_dir = os.path.join(FLAGS.checkpoint_dir, model_name) summaries_dir = os.path.join(FLAGS.summaries_dir, model_name) frames_dir = os.path.join(FLAGS.frames_dir, model_name) settings = { "lr": FLAGS.lr, "gamma": FLAGS.gamma, "game": FLAGS.game, "model_name": model_name, "checkpoint_dir": checkpoint_dir, "summaries_dir": summaries_dir, "frames_dir": frames_dir } else: test_envs = TwoArms.get_envs(FLAGS.game, FLAGS.nb_test_episodes) model_name = "one_test" checkpoint_dir = os.path.join(FLAGS.checkpoint_dir, model_name) summaries_dir = os.path.join(FLAGS.summaries_dir, model_name) frames_dir = os.path.join(FLAGS.frames_dir, model_name) settings = { "lr": FLAGS.lr, "gamma": FLAGS.gamma, "game": FLAGS.game, "model_name": model_name, "checkpoint_dir": checkpoint_dir, "summaries_dir": summaries_dir, "frames_dir": frames_dir, "load_from": os.path.join(FLAGS.checkpoint_dir, model_name), "envs": test_envs } run(settings)
def run_baseline(): test_envs = TwoArms.get_envs(FLAGS.game, FLAGS.nb_test_episodes) model_name = "baseline" settings = {"model_name": model_name, "game": FLAGS.game, "envs": test_envs, "exp_type": "evaluate_baseline"} tf.reset_default_graph() with tf.device("/cpu:0"): num_agents = 1 agents = [] envs = [] for i in range(num_agents): if settings["game"] == '11arms': this_env = ElevenArms() else: this_env = TwoArms(settings["game"]) envs.append(this_env) for i in range(num_agents): agents.append(RandomAgent(envs[i], i, settings)) with tf.Session() as sess: coord = tf.train.Coordinator() agent_threads = [] for agent in agents: agent_play = lambda: agent.play(coord) thread = threading.Thread(target=agent_play) thread.start() agent_threads.append(thread) coord.join(agent_threads)
def run_one_test(): if FLAGS.train: recreate_directory_structure() model_name = "one_test" checkpoint_dir = os.path.join(FLAGS.checkpoint_dir, model_name) summaries_dir = os.path.join(FLAGS.summaries_dir, model_name) frames_dir = os.path.join(FLAGS.frames_dir, model_name) settings = {"lr": FLAGS.lr, "gamma": FLAGS.gamma, "game": FLAGS.game, "model_name": model_name, "checkpoint_dir": checkpoint_dir, "summaries_dir": summaries_dir, "frames_dir": frames_dir} else: test_envs = TwoArms.get_envs(FLAGS.game, FLAGS.nb_test_episodes) model_name = "one_test" checkpoint_dir = os.path.join(FLAGS.checkpoint_dir, model_name) summaries_dir = os.path.join(FLAGS.summaries_dir, model_name) frames_dir = os.path.join(FLAGS.frames_dir, model_name) settings = {"lr": FLAGS.lr, "gamma": FLAGS.gamma, "game": FLAGS.game, "model_name": model_name, "checkpoint_dir": checkpoint_dir, "summaries_dir": summaries_dir, "frames_dir": frames_dir, "load_from": os.path.join(FLAGS.checkpoint_dir, model_name), "envs": test_envs} run(settings)
def test_hypertune(): if tf.gfile.Exists(FLAGS.results_test_file): os.remove(FLAGS.results_test_file) with open(FLAGS.results_val_file, "r") as f: content = f.readlines() lines = [line.rstrip('\n') for line in content] games = [] lrs = [] gammas = [] mean_regrets = [] mean_nb_subopt_armss = [] for line in lines: results = line.split(" ") results = results[1:] game, lr, gamma, mean_regret, mean_nb_subopt_arms = [ r.split("=")[1] for r in results ] games.append(game) lrs.append(lr) gammas.append(gamma) mean_regrets.append(float(mean_regret)) mean_nb_subopt_armss.append(float(mean_nb_subopt_arms)) indices_best_n = np.asarray(mean_regrets).argsort()[:FLAGS.top] best_mean_regrests = [mean_regrets[i] for i in indices_best_n] best_lrs = [lrs[i] for i in indices_best_n] best_gammas = [gammas[i] for i in indices_best_n] best_game = FLAGS.best_model_game test_envs = TwoArms.get_envs(FLAGS.game, FLAGS.nb_test_episodes) for i in range(len(indices_best_n)): model_name = "best_{}__lr_{}__gamma_{}".format( best_game, best_lrs[i], best_gammas[i]) load_from_model_name = "d_{}__lr_{}__gamma_{}".format( best_game, best_lrs[i], best_gammas[i]) print(model_name) checkpoint_dir = os.path.join(FLAGS.checkpoint_dir, model_name) summaries_dir = os.path.join(FLAGS.summaries_dir, model_name) frames_dir = os.path.join(FLAGS.frames_dir, model_name) settings = { "lr": best_lrs[i], "gamma": best_gammas[i], "game": FLAGS.game, "model_name": model_name, "checkpoint_dir": checkpoint_dir, "summaries_dir": summaries_dir, "frames_dir": frames_dir, "load_from": os.path.join(FLAGS.checkpoint_dir, load_from_model_name), "envs": test_envs, "mode": "test" } run(settings)