def train_double_dqn(env, num_actions):
    results_dir = './results/double_dqn/' + game + '_with_bellemare_comments_bonus_1.0'

    training_epsilon = 0.1
    test_epsilon = 0.05

    # frame_history = 1
    # enc_func = toy_mr_encoder.encode_toy_mr_state
    # # cts_size = (11, 12, 7)
    # cts_size = (10, 11, 6)

    frame_history = 4
    cts_size = (42, 42, 8)
    enc_func = atari_encoder.encode_state

    dqn = atari_dqn.AtariDQN(frame_history, num_actions, shared_bias=False)
    agent = dq_learner_pc.DQLearner(dqn,
                                    num_actions,
                                    target_copy_freq=10000,
                                    frame_history=frame_history,
                                    epsilon_end=training_epsilon,
                                    state_encoder=enc_func,
                                    cts_size=cts_size,
                                    bonus_beta=1.0)

    train(agent, env, test_epsilon, results_dir, max_episode_steps=None)
def train_dqn(env, num_actions):
    results_dir = './results/dqn/coin_game'

    training_epsilon = 0.1
    test_epsilon = 0.05

    frame_history = 4
    dqn = atari_dqn.AtariDQN(frame_history, num_actions, shared_bias=False)
    agent = dq_learner.DQLearner(dqn, num_actions, target_copy_freq=10000, epsilon_end=training_epsilon, double=False, frame_history=frame_history)
    train(agent, env, test_epsilon, results_dir)
def train_double_dqn(env, num_actions):
    results_dir = './results/double_dqn/montezuma_revenge'

    training_epsilon = 0.01
    test_epsilon = 0.001

    frame_history = 4
    dqn = atari_dqn.AtariDQN(frame_history, num_actions)
    agent = dq_learner.DQLearner(dqn, num_actions, frame_history=frame_history, epsilon_end=training_epsilon)

    train(agent, env, test_epsilon, results_dir)
def train_rmax_daqn(env, num_actions):
    results_dir = './results/rmax_daqn/%s_fixed_terminal' % game

    training_epsilon = 0.01
    test_epsilon = 0.001

    frame_history = 1
    dqn = atari_dqn.AtariDQN(frame_history, num_actions)
    agent = rmax_learner.RMaxLearner(env,
                                     env.abstraction,
                                     frame_history=frame_history)

    train(agent, env, test_epsilon, results_dir)
def train_double_dqn(env, num_actions, results_dir, game):

    training_epsilon = 0.01
    test_epsilon = 0.001

    frame_history = 4
    dqn = atari_dqn.AtariDQN(frame_history, num_actions)
    agent = dq_learner.DQLearner(dqn,
                                 num_actions,
                                 frame_history=frame_history,
                                 epsilon_end=training_epsilon)

    train(agent, env, test_epsilon, results_dir, game)
Exemple #6
0
def train_double_dqn(env, num_actions):
    results_dir = './results/dqn/%s_single_life_part_2' % game

    training_epsilon = 0.01
    test_epsilon = 0.001

    # frame_history = 1
    frame_history = 4

    dqn = atari_dqn.AtariDQN(frame_history, num_actions)
    with tf.device('/gpu:0'):
        agent = dq_learner.DQLearner(dqn, num_actions, beta=1.0, frame_history=frame_history, epsilon_end=training_epsilon,
                                     restore_network_file='results/dqn/mr_single_life/mr_best_net')
    train(agent, env, test_epsilon, results_dir)
def train_dqn(env, num_actions):
    results_dir = './results/dqn/' + game + '_vanilla'

    training_epsilon = 0.1
    test_epsilon = 0.05

    # frame_history = 1
    # enc_func = toy_mr_encoder.encode_toy_mr_state
    # cts_size = (11, 12, 7)

    frame_history = 4
    cts_size = (42, 42, 8)
    enc_func = atari_encoder.encode_state

    dqn = atari_dqn.AtariDQN(frame_history, num_actions, shared_bias=False)
    agent = dq_learner_pc.DQLearner(dqn, num_actions, target_copy_freq=10000, epsilon_end=training_epsilon, double=False, frame_history=frame_history,
                                    state_encoder=enc_func, cts_size=cts_size)
    train(agent, env, test_epsilon, results_dir)