def train_double_dqn(env, num_actions): results_dir = './results/double_dqn/' + game + '_with_bellemare_comments_bonus_1.0' training_epsilon = 0.1 test_epsilon = 0.05 # frame_history = 1 # enc_func = toy_mr_encoder.encode_toy_mr_state # # cts_size = (11, 12, 7) # cts_size = (10, 11, 6) frame_history = 4 cts_size = (42, 42, 8) enc_func = atari_encoder.encode_state dqn = atari_dqn.AtariDQN(frame_history, num_actions, shared_bias=False) agent = dq_learner_pc.DQLearner(dqn, num_actions, target_copy_freq=10000, frame_history=frame_history, epsilon_end=training_epsilon, state_encoder=enc_func, cts_size=cts_size, bonus_beta=1.0) train(agent, env, test_epsilon, results_dir, max_episode_steps=None)
def train_dqn(env, num_actions): results_dir = './results/dqn/coin_game' training_epsilon = 0.1 test_epsilon = 0.05 frame_history = 4 dqn = atari_dqn.AtariDQN(frame_history, num_actions, shared_bias=False) agent = dq_learner.DQLearner(dqn, num_actions, target_copy_freq=10000, epsilon_end=training_epsilon, double=False, frame_history=frame_history) train(agent, env, test_epsilon, results_dir)
def train_double_dqn(env, num_actions): results_dir = './results/double_dqn/montezuma_revenge' training_epsilon = 0.01 test_epsilon = 0.001 frame_history = 4 dqn = atari_dqn.AtariDQN(frame_history, num_actions) agent = dq_learner.DQLearner(dqn, num_actions, frame_history=frame_history, epsilon_end=training_epsilon) train(agent, env, test_epsilon, results_dir)
def train_rmax_daqn(env, num_actions): results_dir = './results/rmax_daqn/%s_fixed_terminal' % game training_epsilon = 0.01 test_epsilon = 0.001 frame_history = 1 dqn = atari_dqn.AtariDQN(frame_history, num_actions) agent = rmax_learner.RMaxLearner(env, env.abstraction, frame_history=frame_history) train(agent, env, test_epsilon, results_dir)
def train_double_dqn(env, num_actions, results_dir, game): training_epsilon = 0.01 test_epsilon = 0.001 frame_history = 4 dqn = atari_dqn.AtariDQN(frame_history, num_actions) agent = dq_learner.DQLearner(dqn, num_actions, frame_history=frame_history, epsilon_end=training_epsilon) train(agent, env, test_epsilon, results_dir, game)
def train_double_dqn(env, num_actions): results_dir = './results/dqn/%s_single_life_part_2' % game training_epsilon = 0.01 test_epsilon = 0.001 # frame_history = 1 frame_history = 4 dqn = atari_dqn.AtariDQN(frame_history, num_actions) with tf.device('/gpu:0'): agent = dq_learner.DQLearner(dqn, num_actions, beta=1.0, frame_history=frame_history, epsilon_end=training_epsilon, restore_network_file='results/dqn/mr_single_life/mr_best_net') train(agent, env, test_epsilon, results_dir)
def train_dqn(env, num_actions): results_dir = './results/dqn/' + game + '_vanilla' training_epsilon = 0.1 test_epsilon = 0.05 # frame_history = 1 # enc_func = toy_mr_encoder.encode_toy_mr_state # cts_size = (11, 12, 7) frame_history = 4 cts_size = (42, 42, 8) enc_func = atari_encoder.encode_state dqn = atari_dqn.AtariDQN(frame_history, num_actions, shared_bias=False) agent = dq_learner_pc.DQLearner(dqn, num_actions, target_copy_freq=10000, epsilon_end=training_epsilon, double=False, frame_history=frame_history, state_encoder=enc_func, cts_size=cts_size) train(agent, env, test_epsilon, results_dir)