print("Average time steps per episode: {total_epochs / episodes}") # Load a Windy GridWorld environment env_name = "LunarLander-v2" env = gym.make(env_name) random_agent = RandomDQNAgent(env_name, env, 1000, is_state_box=True, memory_capacity=100000) # random_agent.train() agent = DQNAgent(env_name, env, 5000, learning_rate=0.00025, start_epsilon=1.0, discount_factor=0.99, decay_rate=0.0001, make_checkpoint=True, is_state_box=True, batch_size=64, memory_capacity=100000) # agent.memory = random_agent.memory # agent.train() weights, rewards, episode_len = agent.load("/home/dsalwala/NUIG/Thesis/rl-algos/data/LunarLander-v2_100.npy") stats = plotting.EpisodeStats( episode_lengths=episode_len, episode_rewards=rewards) # Search for a Q values # nn, stats = agent.nn.get_weights(), agent.stats nn = ANN(8, 4, 0.00025) nn.set_weights(weights) play_episode(env, nn, 1) env.close() plotting.plot_episode_stats(stats)
agent = DQNAgent(env_name, env, 5000, learning_rate=0.00025, start_epsilon=1.0, discount_factor=0.99, decay_rate=0.0001, make_checkpoint=True, is_state_box=True, batch_size=64, memory_capacity=100000) # agent.memory = random_agent.memory # agent.train() weights, rewards, episode_len = agent.load( "/home/dsalwala/NUIG/Thesis/rl-algos/data/CartPole-v0_3000.npy") stats = plotting.EpisodeStats(episode_lengths=episode_len, episode_rewards=rewards) # Search for a Q values # nn, stats = agent.nn, agent.stats nn = ANN(4, 2, 0.00025) nn.set_weights(weights) play_episode(env, nn, 1) env.close() plotting.plot_episode_stats(stats)