예제 #1
0
def main():
    import config
    # make env
    g_config = config.config()
    g_config.env_name = "Pong2p-v0"
    env = gym.make(g_config.env_name)
    env = MaxAndSkipEnv(env, skip=g_config.skip_frame)
    env = PreproWrapper(env,
                        prepro=greyscale,
                        shape=(80, 80, 1),
                        overwrite_render=g_config.overwrite_render)

    # exploration strategy
    # you may want to modify this schedule
    exp_schedule = LinearExploration(env, g_config.eps_begin, g_config.eps_end,
                                     g_config.eps_nsteps)

    # you may want to modify this schedule
    # learning rate schedule
    lr_schedule = LinearSchedule(g_config.lr_begin, g_config.lr_end,
                                 g_config.lr_nsteps)

    # train model
    # model_0 = dqns.AdvantageQN(env, config.config(), name="Adv_A")
    # model_1 = dqns.AdvantageQN(env, config.config(), name="Adv_B")
    model_0 = dqns.NatureQN(env, config.config(), name="Nature_A")
    model_1 = dqns.NatureQN(env, config.config(), name="Nature_B")
    trainer = SelfPlayTrainer(model_0, model_1, env, g_config)
    trainer.run_parallel_models(exp_schedule, lr_schedule, True, True)
예제 #2
0
def main():
    import config
    g_config = config.config()

    # make env
    env = gym.make("Pong-v0")
    env = MaxAndSkipEnv(env, skip=g_config.skip_frame)
    env = PreproWrapper(env,
                        prepro=greyscale,
                        shape=(80, 80, 1),
                        overwrite_render=g_config.overwrite_render)

    # exploration strategy
    # you may want to modify this schedule
    exp_schedule = LinearExploration(env, g_config.eps_begin, g_config.eps_end,
                                     g_config.eps_nsteps)

    # you may want to modify this schedule
    # learning rate schedule
    lr_schedule = LinearSchedule(g_config.lr_begin, g_config.lr_end,
                                 g_config.lr_nsteps)

    # train model
    # model = AdvantageQN(env, config.config(), name="SingleADV")
    model = NatureQN(env, config.config(), name="SingleNatureQN")
    model.run(exp_schedule, lr_schedule)
예제 #3
0
def single_train():
    import config
    # make env
    g_config = config.config()
    g_config.env_name = "Pong2p-v0"
    env = gym.make(g_config.env_name)
    env = MaxAndSkipEnv(env, skip=g_config.skip_frame)
    env = PreproWrapper(env,
                        prepro=greyscale,
                        shape=(80, 80, 1),
                        overwrite_render=g_config.overwrite_render)

    # exploration strategy
    # you may want to modify this schedule
    exp_schedule = LinearExploration(env, g_config.eps_begin, g_config.eps_end,
                                     g_config.eps_nsteps)

    # you may want to modify this schedule
    # learning rate schedule
    lr_schedule = LinearSchedule(g_config.lr_begin, g_config.lr_end,
                                 g_config.lr_nsteps)

    # train model
    model_0 = dqns.AdvantageQN(env, config.config(), name="Adv_Single")

    model_1 = dqns.AdvantageQN(env, config.config(), name="Adv_FixedOpp")
    exp_schedule1 = LinearExploration(env, 0.00001, 0.00000001,
                                      g_config.eps_nsteps)
    """
    model_1 = dqns.AdvantageQN(env, config.config(), name="Random")
    exp_schedule1 = LinearExploration(env, 1,
                                     1, g_config.eps_nsteps)
    """

    model_0.initialize()
    model_1.load("trained_models/03_1521/Adv_A/model.weights/model-250244")

    trainer = FixedTargetTrainer(model_0, model_1, env, g_config)

    trainer.record(exp_schedule)  # record one at beginning
    trainer.train(exp_schedule, lr_schedule, exp_schedule1)
    trainer.record(exp_schedule)  # record one at end
예제 #4
0
"""
Use a different architecture for the Atari game. Please report the final result.
Feel free to change the configuration. If so, please report your hyperparameters.
"""
if __name__ == '__main__':
    import config
    g_config = config.config()

    # make env
    env = gym.make("Pong-v0")
    env = MaxAndSkipEnv(env, skip=g_config.skip_frame)
    env = PreproWrapper(env,
                        prepro=greyscale,
                        shape=(80, 80, 1),
                        overwrite_render=g_config.overwrite_render)

    # exploration strategy
    # you may want to modify this schedule
    exp_schedule = LinearExploration(env, g_config.eps_begin, g_config.eps_end,
                                     g_config.eps_nsteps)

    # you may want to modify this schedule
    # learning rate schedule
    lr_schedule = LinearSchedule(g_config.lr_begin, g_config.lr_end,
                                 g_config.lr_nsteps)

    # train model
    model = AdvantageQN(env, config)
    model.run(exp_schedule, lr_schedule)
예제 #5
0
def run_games():
    g_config = config.config()
    g_config.env_name = "Pong2p-v0"
    env = gym.make(g_config.env_name)
    env = MaxAndSkipEnv(env, skip=g_config.skip_frame)
    env = PreproWrapper(env,
                        prepro=greyscale,
                        shape=(80, 80, 1),
                        overwrite_render=g_config.overwrite_render)
    exp_schedule = LinearExploration(env, g_config.eps_begin, g_config.eps_end,
                                     g_config.eps_nsteps)
    lr_schedule = LinearSchedule(g_config.lr_begin, g_config.lr_end,
                                 g_config.lr_nsteps)

    evaluator = Evaluator(env, config)
    csv_file = open(evaluator.config.output_path + "results.csv",
                    mode='w',
                    newline="")
    csv_res = csv.writer(csv_file)
    csv_res.writerow([
        *[m + "_0" for m in model_info_names],
        *[m + "_1" for m in model_info_names], "win_0", "win_1"
    ])

    def enumerate_models(model_dir,
                         model_nums,
                         name,
                         m_class=dqns.AdvantageQN):
        models = []
        for m in model_nums:
            cur_model = m_class(env, g_config, name=name)
            cur_model.num = m
            cur_model.elo = 0
            cur_model.model_dir = model_dir
            cur_model.load(model_dir + "-" + str(m))
            models.append(cur_model)
        return tuple(models)

    pairs = []

    def compatable_with(model_set_a, model_sets_b):
        msb = itertools.chain.from_iterable(model_sets_b)
        pairs.extend(itertools.product(model_set_a, msb))

    # Now to specify the models that are available

    # scoring 1 game takes
    # 1.5 min * (15/ 100) ~= 15 sec
    # one hour =  240 games -> 480 game results
    rounds = 0
    models = []

    def first_run():
        # ok, first goal is to get scores (25 games each) for

        # 1 single play , 2 self-play @ 250k
        # 1 single play , 2 self-play @ 1M
        # 1 single play , 2 self-play @ 2.5M
        # 2 single play , 4 self-play ends
        # total 15 models

        model_dir = "trained_models/{}/model.weights/model"

        single_play = enumerate_models(model_dir.format("02_2204/SingleADV"),
                                       [4011594, 4764484], "Single")
        self_play0A = enumerate_models(model_dir.format("02_2205/Adv_A"),
                                       [4006694, 4757864], "Adv0A")
        self_play0B = enumerate_models(model_dir.format("02_2205/Adv_B"),
                                       [4006694, 4757864], "Adv0B")
        self_play1A = enumerate_models(model_dir.format("02_2209/Adv_A"),
                                       [4005221, 4756947], "Adv1A")
        self_play1B = enumerate_models(model_dir.format("02_2209/Adv_B"),
                                       [4005221, 4756947], "Adv1B")
        self_play0 = self_play0A + self_play0B
        self_play1 = self_play1A + self_play1B

        compatable_with(single_play, [self_play0, self_play1])
        compatable_with(self_play0, [single_play, self_play1])
        compatable_with(self_play1, [single_play, self_play0])

        nonlocal models
        nonlocal rounds
        models = single_play + self_play0 + self_play1
        rounds = 5

    def second_run():
        model_dir = "trained_models/{}/model.weights/model"

        single_play = enumerate_models(model_dir.format("03_1501/SingleADV"),
                                       [250272, 1001582, 2506140, 4763791],
                                       "Single")
        single_play += enumerate_models(model_dir.format("02_2204/SingleADV"),
                                        [4764484], "Single")

        single_play1 = enumerate_models(
            model_dir.format("03_2349/SingleNatureQN"),
            [250068, 1002011, 2505567, 4764637], "SingleDQN", dqns.NatureQN)
        single_play2 = enumerate_models(
            model_dir.format("04_0232/SingleNatureQN"),
            [250360, 1001844, 2508136, 4766454], "SingleDQN", dqns.NatureQN)

        self_play0A = enumerate_models(model_dir.format("02_2205/Adv_A"),
                                       [4757864], "Adv0A")
        self_play0B = enumerate_models(model_dir.format("02_2205/Adv_B"),
                                       [4757864], "Adv0B")
        self_play0 = self_play0A + self_play0B

        self_play1A = enumerate_models(model_dir.format("02_2209/Adv_A"),
                                       [4756947], "Adv1A")
        self_play1B = enumerate_models(model_dir.format("02_2209/Adv_B"),
                                       [4756947], "Adv1B")
        self_play1 = self_play1A + self_play1B

        self_play2A = enumerate_models(model_dir.format("03_1520/Adv_A"),
                                       [250020, 1001335, 2505766], "Adv2A")
        self_play2B = enumerate_models(model_dir.format("03_1520/Adv_B"),
                                       [250020, 1001335, 2505766], "Adv2B")
        self_play2 = self_play2A + self_play2B

        self_play3A = enumerate_models(model_dir.format("03_1521/Adv_A"),
                                       [250244, 1002194, 2505204], "Adv3A")
        self_play3B = enumerate_models(model_dir.format("03_1521/Adv_B"),
                                       [250244, 1002194, 2505204], "Adv3B")
        self_play3 = self_play3A + self_play3B

        self_play0A = enumerate_models(model_dir.format("03_2357/Nature_A"),
                                       [250101, 1001653, 2503025, 4758399],
                                       "Nature4A", dqns.NatureQN)
        self_play0B = enumerate_models(model_dir.format("03_2357/Nature_B"),
                                       [250101, 1001653, 2503025, 4758399],
                                       "Nature4B", dqns.NatureQN)
        self_play4 = self_play4A + self_play4B

        self_play5 = enumerate_models(model_dir.format("04_1006/Adv_Single"),
                                      [250047, 1000867, 2501982, 4753393],
                                      "ADV_vs_Random")
        self_play6 = enumerate_models(model_dir.format("04_1009/Adv_Single"),
                                      [250016, 1000902, 2501784, 4753522],
                                      "ADV_vs_250k")

        all_sets = {
            single_play, single_play1, single_play2, self_play0, self_play1,
            self_play2, self_play3
        }

        compatable_with(single_play,
                        all_sets - {single_play, self_play0, self_play1})
        compatable_with(self_play0,
                        all_sets - {single_play, self_play0, self_play1})
        compatable_with(self_play1,
                        all_sets - {single_play, self_play0, self_play1})
        compatable_with(single_play1, all_sets - {single_play1})
        compatable_with(single_play2, all_sets - {single_play2})
        compatable_with(self_play2, all_sets - {self_play2})
        compatable_with(self_play3, all_sets - {self_play3})

        nonlocal models
        nonlocal rounds
        models = list(itertools.chain.from_iterable(all_sets))
        rounds = 1

    def third_run():
        model_dir = "trained_models/{}/model.weights/model"

        single_play = enumerate_models(model_dir.format("03_1501/SingleADV"),
                                       [250272, 1001582, 2506140, 4763791],
                                       "Single")
        single_play += enumerate_models(model_dir.format("02_2204/SingleADV"),
                                        [4764484], "Single")

        single_play1 = enumerate_models(
            model_dir.format("03_2349/SingleNatureQN"),
            [250068, 1002011, 2505567, 4764637], "SingleDQN0", dqns.NatureQN)
        single_play2 = enumerate_models(
            model_dir.format("04_0232/SingleNatureQN"),
            [250360, 1001844, 2508136, 4766454], "SingleDQN1", dqns.NatureQN)

        self_play0A = enumerate_models(model_dir.format("02_2205/Adv_A"),
                                       [4757864], "Adv0A")
        self_play0B = enumerate_models(model_dir.format("02_2205/Adv_B"),
                                       [4757864], "Adv0B")
        self_play0 = self_play0A + self_play0B

        self_play1A = enumerate_models(model_dir.format("02_2209/Adv_A"),
                                       [4756947], "Adv1A")
        self_play1B = enumerate_models(model_dir.format("02_2209/Adv_B"),
                                       [4756947], "Adv1B")
        self_play1 = self_play1A + self_play1B

        self_play2A = enumerate_models(model_dir.format("03_1520/Adv_A"),
                                       [250020, 1001335, 2505766], "Adv2A")
        self_play2B = enumerate_models(model_dir.format("03_1520/Adv_B"),
                                       [250020, 1001335, 2505766], "Adv2B")
        self_play2 = self_play2A + self_play2B

        self_play3A = enumerate_models(model_dir.format("03_1521/Adv_A"),
                                       [250244, 1002194, 2505204], "Adv3A")
        self_play3B = enumerate_models(model_dir.format("03_1521/Adv_B"),
                                       [250244, 1002194, 2505204], "Adv3B")
        self_play3 = self_play3A + self_play3B

        self_play4A = enumerate_models(model_dir.format("03_2357/Nature_A"),
                                       [250101, 1001653, 2503205, 4758399],
                                       "Nature4A", dqns.NatureQN)
        self_play4B = enumerate_models(model_dir.format("03_2357/Nature_B"),
                                       [250101, 1001653, 2503205, 4758399],
                                       "Nature4B", dqns.NatureQN)
        self_play4 = self_play4A + self_play4B

        self_play5 = enumerate_models(model_dir.format("04_1006/Adv_Single"),
                                      [250047, 1000867, 2501982, 4753393],
                                      "ADV_vs_Random")
        self_play6 = enumerate_models(model_dir.format("04_1109/Adv_Single"),
                                      [250016, 1000902, 2501784, 4753522],
                                      "ADV_vs_250k")

        all_sets = {
            single_play, single_play1, single_play2, self_play0, self_play1,
            self_play2, self_play3, self_play4, self_play5, self_play6
        }
        new_sets = {self_play4, self_play5, self_play6}

        for s in all_sets - new_sets:
            compatable_with(s, new_sets)
        for s in new_sets:
            compatable_with(s, all_sets - {s})

        nonlocal models
        nonlocal rounds
        models = list(itertools.chain.from_iterable(all_sets))
        rounds = 1

    # Which environment to run
    third_run()

    # now to actually score the games
    results = []

    for i in range(rounds):
        for m0, m1 in pairs:
            score_0, score_1 = evaluator.evaluate(m0, m1)
            update_elo(m0, m1, 30, score_0, score_1)
            info = [*model_info(m0), *model_info(m1), score_0, score_1]
            print(info)
            print(m0.elo, m1.elo)
            csv_res.writerow(info)
            results.append([m0, m1, score_0, score_1])
    csv_res.close()
    return results, evaluator.config.output_path