def train_dqn(env, num_actions): results_dir = './results/dqn/coin_game' training_epsilon = 0.1 test_epsilon = 0.05 frame_history = 4 dqn = atari_dqn.AtariDQN(frame_history, num_actions, shared_bias=False) agent = dq_learner.DQLearner(dqn, num_actions, target_copy_freq=10000, epsilon_end=training_epsilon, double=False, frame_history=frame_history) train(agent, env, test_epsilon, results_dir)
def train_double_dqn(env, num_actions): results_dir = './results/dqn/%s' % game training_epsilon = 0.01 test_epsilon = 0.001 frame_history = 1 dqn = atari_dqn.AtariDQN(frame_history, num_actions) agent = dq_learner.DQLearner(dqn, num_actions, frame_history=frame_history, epsilon_end=training_epsilon) train(agent, env, test_epsilon, results_dir)
def train_tabular_dqn(env, num_actions): results_dir = './results/dqn/tab_coin_game_lr0.0025_rp10000' training_epsilon = 0.1 test_epsilon = 0.05 n = 3 frame_history = 1 dqn = tabular_dqn.TabularDQN(n, frame_history, num_actions, shared_bias=False) agent = dq_learner.DQLearner(dqn, num_actions, target_copy_freq=3000, epsilon_end=training_epsilon, double=False, frame_history=frame_history, learning_rate=0.0025, replay_start_size=10000, epsilon_steps=100000., replay_memory_size=10001 ) train(agent, env, test_epsilon, results_dir)
def train_double_dqn(env, num_actions): results_dir = './results/dqn/%s_single_life_part_2' % game training_epsilon = 0.01 test_epsilon = 0.001 # frame_history = 1 frame_history = 4 dqn = atari_dqn.AtariDQN(frame_history, num_actions) with tf.device('/gpu:0'): agent = dq_learner.DQLearner(dqn, num_actions, beta=1.0, frame_history=frame_history, epsilon_end=training_epsilon, restore_network_file='results/dqn/mr_single_life/mr_best_net') train(agent, env, test_epsilon, results_dir)
def train_double_dqrn(env, num_actions, results_dir, game): training_epsilon = 0.01 test_epsilon = 0.001 frame_history = 4 dqn = atari_dqrn.AtariDQRN(frame_history, num_actions, shared_bias=False, append_conv_output=False) agent = dq_learner.DQLearner(dqn, num_actions, target_copy_freq=10000, epsilon_end=training_epsilon, double=False, frame_history=frame_history) train(agent, env, test_epsilon, results_dir, game)