action = action_to_one_hot_bool(agent.select_epsilon_greedy_action(state,epsilon)) for i in range(frame_repeat): sleep(sleep_time) reward = game.make_action(action, 1) done = game.is_episode_finished() state = next_state reward_sum += reward if done: print('episode:', episode, 'sum_of_rewards_for_episode:', reward_sum) total_reward.append(reward_sum) break return agent, total_reward if __name__ == "__main__": version = 'DDQN' config_file = 'deadly_corridor' checkpoint_file = './results/dqn/agent1/cp-0001.ckpt' frame_repeat = 12 episodes = 5 config_path = 'D:/Joe/Anaconda3/envs/tensorflow_env/Lib/site-packages/vizdoom/scenarios/'+config_file+'.cfg' game = initialize_vizdoom(config_path) action_size = game.get_available_buttons_size() print('action_size',action_size) state_size = np.array(game.get_state().screen_buffer.shape) if version=='DQN': agent = DQN.DQN_Agent(state_size,action_size,checkpoint_file=checkpoint_file) if version=='DDQN': agent = Double_DQN.DQN_Agent(state_size,action_size,checkpoint_file=checkpoint_file) _, total_reward = run_agent(agent, game, frame_repeat) game.close()
#parameters batch_size=6 learning_rate=0.001 folder=str(learning_rate) replay_buffer_size=100000 m = 256 episodes = 50 n_agents = 1 for version in versions: for config_file in config_files: config_path = 'D:/Joe/Anaconda3/envs/tensorflow_env/Lib/site-packages/vizdoom/scenarios/'+config_file+'.cfg' all_total_rewards = [] for n in range(n_agents): game = initialize_vizdoom(config_path) action_size = game.get_available_buttons_size() print('action_size',action_size) state_size = np.array(game.get_state().screen_buffer.shape) if version=='DQN': agent = DQN.DQN_Agent(state_size,action_size, batch_size=batch_size, learning_rate=learning_rate, replay_buffer_size=replay_buffer_size, checkpoint_file='./agent'+str(n)+'/cp-9999.ckpt') if version=='DDQN': agent = Double_DQN.DQN_Agent(state_size,action_size, batch_size=batch_size, learning_rate=learning_rate, replay_buffer_size=replay_buffer_size, checkpoint_file='./agent'+str(n)+'/cp-9999.ckpt') _, total_reward = train_agent(agent, game, frame_repeat) all_total_rewards.append(total_reward) game.close() all_total_rewards_to_save = np.array(all_total_rewards) if version=='DQN': np.save('./all_total_rewards_'+config_file+'_'+version+'.npy', all_total_rewards_to_save) if version=='DDQN': np.save('./all_total_rewards_'+config_file+'_'+version+'.npy', all_total_rewards_to_save)