def main(): import config # make env g_config = config.config() g_config.env_name = "Pong2p-v0" env = gym.make(g_config.env_name) env = MaxAndSkipEnv(env, skip=g_config.skip_frame) env = PreproWrapper(env, prepro=greyscale, shape=(80, 80, 1), overwrite_render=g_config.overwrite_render) # exploration strategy # you may want to modify this schedule exp_schedule = LinearExploration(env, g_config.eps_begin, g_config.eps_end, g_config.eps_nsteps) # you may want to modify this schedule # learning rate schedule lr_schedule = LinearSchedule(g_config.lr_begin, g_config.lr_end, g_config.lr_nsteps) # train model # model_0 = dqns.AdvantageQN(env, config.config(), name="Adv_A") # model_1 = dqns.AdvantageQN(env, config.config(), name="Adv_B") model_0 = dqns.NatureQN(env, config.config(), name="Nature_A") model_1 = dqns.NatureQN(env, config.config(), name="Nature_B") trainer = SelfPlayTrainer(model_0, model_1, env, g_config) trainer.run_parallel_models(exp_schedule, lr_schedule, True, True)
def main(): import config g_config = config.config() # make env env = gym.make("Pong-v0") env = MaxAndSkipEnv(env, skip=g_config.skip_frame) env = PreproWrapper(env, prepro=greyscale, shape=(80, 80, 1), overwrite_render=g_config.overwrite_render) # exploration strategy # you may want to modify this schedule exp_schedule = LinearExploration(env, g_config.eps_begin, g_config.eps_end, g_config.eps_nsteps) # you may want to modify this schedule # learning rate schedule lr_schedule = LinearSchedule(g_config.lr_begin, g_config.lr_end, g_config.lr_nsteps) # train model # model = AdvantageQN(env, config.config(), name="SingleADV") model = NatureQN(env, config.config(), name="SingleNatureQN") model.run(exp_schedule, lr_schedule)
def single_train(): import config # make env g_config = config.config() g_config.env_name = "Pong2p-v0" env = gym.make(g_config.env_name) env = MaxAndSkipEnv(env, skip=g_config.skip_frame) env = PreproWrapper(env, prepro=greyscale, shape=(80, 80, 1), overwrite_render=g_config.overwrite_render) # exploration strategy # you may want to modify this schedule exp_schedule = LinearExploration(env, g_config.eps_begin, g_config.eps_end, g_config.eps_nsteps) # you may want to modify this schedule # learning rate schedule lr_schedule = LinearSchedule(g_config.lr_begin, g_config.lr_end, g_config.lr_nsteps) # train model model_0 = dqns.AdvantageQN(env, config.config(), name="Adv_Single") model_1 = dqns.AdvantageQN(env, config.config(), name="Adv_FixedOpp") exp_schedule1 = LinearExploration(env, 0.00001, 0.00000001, g_config.eps_nsteps) """ model_1 = dqns.AdvantageQN(env, config.config(), name="Random") exp_schedule1 = LinearExploration(env, 1, 1, g_config.eps_nsteps) """ model_0.initialize() model_1.load("trained_models/03_1521/Adv_A/model.weights/model-250244") trainer = FixedTargetTrainer(model_0, model_1, env, g_config) trainer.record(exp_schedule) # record one at beginning trainer.train(exp_schedule, lr_schedule, exp_schedule1) trainer.record(exp_schedule) # record one at end
""" Use a different architecture for the Atari game. Please report the final result. Feel free to change the configuration. If so, please report your hyperparameters. """ if __name__ == '__main__': import config g_config = config.config() # make env env = gym.make("Pong-v0") env = MaxAndSkipEnv(env, skip=g_config.skip_frame) env = PreproWrapper(env, prepro=greyscale, shape=(80, 80, 1), overwrite_render=g_config.overwrite_render) # exploration strategy # you may want to modify this schedule exp_schedule = LinearExploration(env, g_config.eps_begin, g_config.eps_end, g_config.eps_nsteps) # you may want to modify this schedule # learning rate schedule lr_schedule = LinearSchedule(g_config.lr_begin, g_config.lr_end, g_config.lr_nsteps) # train model model = AdvantageQN(env, config) model.run(exp_schedule, lr_schedule)
def run_games(): g_config = config.config() g_config.env_name = "Pong2p-v0" env = gym.make(g_config.env_name) env = MaxAndSkipEnv(env, skip=g_config.skip_frame) env = PreproWrapper(env, prepro=greyscale, shape=(80, 80, 1), overwrite_render=g_config.overwrite_render) exp_schedule = LinearExploration(env, g_config.eps_begin, g_config.eps_end, g_config.eps_nsteps) lr_schedule = LinearSchedule(g_config.lr_begin, g_config.lr_end, g_config.lr_nsteps) evaluator = Evaluator(env, config) csv_file = open(evaluator.config.output_path + "results.csv", mode='w', newline="") csv_res = csv.writer(csv_file) csv_res.writerow([ *[m + "_0" for m in model_info_names], *[m + "_1" for m in model_info_names], "win_0", "win_1" ]) def enumerate_models(model_dir, model_nums, name, m_class=dqns.AdvantageQN): models = [] for m in model_nums: cur_model = m_class(env, g_config, name=name) cur_model.num = m cur_model.elo = 0 cur_model.model_dir = model_dir cur_model.load(model_dir + "-" + str(m)) models.append(cur_model) return tuple(models) pairs = [] def compatable_with(model_set_a, model_sets_b): msb = itertools.chain.from_iterable(model_sets_b) pairs.extend(itertools.product(model_set_a, msb)) # Now to specify the models that are available # scoring 1 game takes # 1.5 min * (15/ 100) ~= 15 sec # one hour = 240 games -> 480 game results rounds = 0 models = [] def first_run(): # ok, first goal is to get scores (25 games each) for # 1 single play , 2 self-play @ 250k # 1 single play , 2 self-play @ 1M # 1 single play , 2 self-play @ 2.5M # 2 single play , 4 self-play ends # total 15 models model_dir = "trained_models/{}/model.weights/model" single_play = enumerate_models(model_dir.format("02_2204/SingleADV"), [4011594, 4764484], "Single") self_play0A = enumerate_models(model_dir.format("02_2205/Adv_A"), [4006694, 4757864], "Adv0A") self_play0B = enumerate_models(model_dir.format("02_2205/Adv_B"), [4006694, 4757864], "Adv0B") self_play1A = enumerate_models(model_dir.format("02_2209/Adv_A"), [4005221, 4756947], "Adv1A") self_play1B = enumerate_models(model_dir.format("02_2209/Adv_B"), [4005221, 4756947], "Adv1B") self_play0 = self_play0A + self_play0B self_play1 = self_play1A + self_play1B compatable_with(single_play, [self_play0, self_play1]) compatable_with(self_play0, [single_play, self_play1]) compatable_with(self_play1, [single_play, self_play0]) nonlocal models nonlocal rounds models = single_play + self_play0 + self_play1 rounds = 5 def second_run(): model_dir = "trained_models/{}/model.weights/model" single_play = enumerate_models(model_dir.format("03_1501/SingleADV"), [250272, 1001582, 2506140, 4763791], "Single") single_play += enumerate_models(model_dir.format("02_2204/SingleADV"), [4764484], "Single") single_play1 = enumerate_models( model_dir.format("03_2349/SingleNatureQN"), [250068, 1002011, 2505567, 4764637], "SingleDQN", dqns.NatureQN) single_play2 = enumerate_models( model_dir.format("04_0232/SingleNatureQN"), [250360, 1001844, 2508136, 4766454], "SingleDQN", dqns.NatureQN) self_play0A = enumerate_models(model_dir.format("02_2205/Adv_A"), [4757864], "Adv0A") self_play0B = enumerate_models(model_dir.format("02_2205/Adv_B"), [4757864], "Adv0B") self_play0 = self_play0A + self_play0B self_play1A = enumerate_models(model_dir.format("02_2209/Adv_A"), [4756947], "Adv1A") self_play1B = enumerate_models(model_dir.format("02_2209/Adv_B"), [4756947], "Adv1B") self_play1 = self_play1A + self_play1B self_play2A = enumerate_models(model_dir.format("03_1520/Adv_A"), [250020, 1001335, 2505766], "Adv2A") self_play2B = enumerate_models(model_dir.format("03_1520/Adv_B"), [250020, 1001335, 2505766], "Adv2B") self_play2 = self_play2A + self_play2B self_play3A = enumerate_models(model_dir.format("03_1521/Adv_A"), [250244, 1002194, 2505204], "Adv3A") self_play3B = enumerate_models(model_dir.format("03_1521/Adv_B"), [250244, 1002194, 2505204], "Adv3B") self_play3 = self_play3A + self_play3B self_play0A = enumerate_models(model_dir.format("03_2357/Nature_A"), [250101, 1001653, 2503025, 4758399], "Nature4A", dqns.NatureQN) self_play0B = enumerate_models(model_dir.format("03_2357/Nature_B"), [250101, 1001653, 2503025, 4758399], "Nature4B", dqns.NatureQN) self_play4 = self_play4A + self_play4B self_play5 = enumerate_models(model_dir.format("04_1006/Adv_Single"), [250047, 1000867, 2501982, 4753393], "ADV_vs_Random") self_play6 = enumerate_models(model_dir.format("04_1009/Adv_Single"), [250016, 1000902, 2501784, 4753522], "ADV_vs_250k") all_sets = { single_play, single_play1, single_play2, self_play0, self_play1, self_play2, self_play3 } compatable_with(single_play, all_sets - {single_play, self_play0, self_play1}) compatable_with(self_play0, all_sets - {single_play, self_play0, self_play1}) compatable_with(self_play1, all_sets - {single_play, self_play0, self_play1}) compatable_with(single_play1, all_sets - {single_play1}) compatable_with(single_play2, all_sets - {single_play2}) compatable_with(self_play2, all_sets - {self_play2}) compatable_with(self_play3, all_sets - {self_play3}) nonlocal models nonlocal rounds models = list(itertools.chain.from_iterable(all_sets)) rounds = 1 def third_run(): model_dir = "trained_models/{}/model.weights/model" single_play = enumerate_models(model_dir.format("03_1501/SingleADV"), [250272, 1001582, 2506140, 4763791], "Single") single_play += enumerate_models(model_dir.format("02_2204/SingleADV"), [4764484], "Single") single_play1 = enumerate_models( model_dir.format("03_2349/SingleNatureQN"), [250068, 1002011, 2505567, 4764637], "SingleDQN0", dqns.NatureQN) single_play2 = enumerate_models( model_dir.format("04_0232/SingleNatureQN"), [250360, 1001844, 2508136, 4766454], "SingleDQN1", dqns.NatureQN) self_play0A = enumerate_models(model_dir.format("02_2205/Adv_A"), [4757864], "Adv0A") self_play0B = enumerate_models(model_dir.format("02_2205/Adv_B"), [4757864], "Adv0B") self_play0 = self_play0A + self_play0B self_play1A = enumerate_models(model_dir.format("02_2209/Adv_A"), [4756947], "Adv1A") self_play1B = enumerate_models(model_dir.format("02_2209/Adv_B"), [4756947], "Adv1B") self_play1 = self_play1A + self_play1B self_play2A = enumerate_models(model_dir.format("03_1520/Adv_A"), [250020, 1001335, 2505766], "Adv2A") self_play2B = enumerate_models(model_dir.format("03_1520/Adv_B"), [250020, 1001335, 2505766], "Adv2B") self_play2 = self_play2A + self_play2B self_play3A = enumerate_models(model_dir.format("03_1521/Adv_A"), [250244, 1002194, 2505204], "Adv3A") self_play3B = enumerate_models(model_dir.format("03_1521/Adv_B"), [250244, 1002194, 2505204], "Adv3B") self_play3 = self_play3A + self_play3B self_play4A = enumerate_models(model_dir.format("03_2357/Nature_A"), [250101, 1001653, 2503205, 4758399], "Nature4A", dqns.NatureQN) self_play4B = enumerate_models(model_dir.format("03_2357/Nature_B"), [250101, 1001653, 2503205, 4758399], "Nature4B", dqns.NatureQN) self_play4 = self_play4A + self_play4B self_play5 = enumerate_models(model_dir.format("04_1006/Adv_Single"), [250047, 1000867, 2501982, 4753393], "ADV_vs_Random") self_play6 = enumerate_models(model_dir.format("04_1109/Adv_Single"), [250016, 1000902, 2501784, 4753522], "ADV_vs_250k") all_sets = { single_play, single_play1, single_play2, self_play0, self_play1, self_play2, self_play3, self_play4, self_play5, self_play6 } new_sets = {self_play4, self_play5, self_play6} for s in all_sets - new_sets: compatable_with(s, new_sets) for s in new_sets: compatable_with(s, all_sets - {s}) nonlocal models nonlocal rounds models = list(itertools.chain.from_iterable(all_sets)) rounds = 1 # Which environment to run third_run() # now to actually score the games results = [] for i in range(rounds): for m0, m1 in pairs: score_0, score_1 = evaluator.evaluate(m0, m1) update_elo(m0, m1, 30, score_0, score_1) info = [*model_info(m0), *model_info(m1), score_0, score_1] print(info) print(m0.elo, m1.elo) csv_res.writerow(info) results.append([m0, m1, score_0, score_1]) csv_res.close() return results, evaluator.config.output_path