def test_pendulum(self): env = OpenAIGym(level='Pendulum', max_timesteps=100) hidden_size = 16 actor_fn = (lambda: torch.nn.Sequential( torch.nn.Linear(in_features=env.state_space['shape'][0], out_features=hidden_size), torch.nn.Tanh(), torch.nn.Linear(in_features=hidden_size, out_features=env.action_space['shape'][0]))) critic_fn = (lambda: torch.nn.Sequential( torch.nn.Linear(in_features=env.state_space['shape'][0], out_features=hidden_size), torch.nn.Tanh(), torch.nn.Linear(in_features=hidden_size, out_features=1))) agent = AC(state_space=env.state_space, action_space=env.action_space, actor_fn=actor_fn, actor_learning_rate=1e-3, critic_fn=critic_fn, critic_learning_rate=1e-3, discount=0.95, compute_advantage=True, normalize_returns=True) experiment.train(agent=agent, env=env, num_episodes=10) experiment.evaluate(agent=agent, env=env, num_episodes=10) agent.close() env.close()
def test_cartpole(self): env = OpenAIGym(level='CartPole', max_timesteps=100) agent = RandomAgent(state_space=env.state_space, action_space=env.action_space) experiment.train(agent=agent, env=env, num_episodes=10) experiment.evaluate(agent=agent, env=env, num_episodes=10) agent.close() env.close()
def test_frozenlake(self): env = OpenAIGym(level='FrozenLake', max_timesteps=100) agent = TQL(state_space=env.state_space, action_space=env.action_space, learning_rate=0.3) experiment.train(agent=agent, env=env, num_episodes=10) experiment.evaluate(agent=agent, env=env, num_episodes=10) agent.close() env.close()
def test_taxi(self): env = OpenAIGym(level='Taxi', max_timesteps=100) agent = TQL(state_space=env.state_space, action_space=env.action_space, learning_rate=0.3, discount=0.95, exploration=0.1) experiment.train(agent=agent, env=env, num_episodes=10) experiment.evaluate(agent=agent, env=env, num_episodes=10) agent.close() env.close()
def test_frozenlake(self): env = OpenAIGym(level='FrozenLake', max_timesteps=100) num_states = env.state_space['num_values'] hidden_size = 16 network_fn = (lambda: torch.nn.Sequential( torch.nn.Embedding(num_embeddings=num_states, embedding_dim=hidden_size), torch.nn.Tanh(), torch.nn.Linear(in_features=hidden_size, out_features=env.action_space['num_values']) )) agent = DQN(state_space=env.state_space, action_space=env.action_space, network_fn=network_fn, learning_rate=1e-3) experiment.train(agent=agent, env=env, num_episodes=10) experiment.evaluate(agent=agent, env=env, num_episodes=10) agent.close() env.close()
def test_cartpole(self): env = OpenAIGym(level='CartPole', max_timesteps=100) hidden_size = 16 network_fn = (lambda: torch.nn.Sequential( torch.nn.Linear(in_features=env.state_space['shape'][0], out_features=hidden_size), torch.nn.Tanh(), torch.nn.Linear(in_features=hidden_size, out_features=env.action_space['num_values']) )) agent = DQN(state_space=env.state_space, action_space=env.action_space, network_fn=network_fn, learning_rate=1e-3, target_network_update_frequency=10, memory=100, batch_size=16, update_frequency=4) experiment.train(agent=agent, env=env, num_episodes=10) experiment.evaluate(agent=agent, env=env, num_episodes=10) agent.close() env.close()
def test_taxi(self): env = OpenAIGym(level='Taxi', max_timesteps=100) num_states = env.state_space['num_values'] hidden_size = 16 network_fn = (lambda: torch.nn.Sequential( torch.nn.Embedding(num_embeddings=num_states, embedding_dim=hidden_size), torch.nn.Tanh(), torch.nn.Linear(in_features=hidden_size, out_features=env.action_space['num_values']) )) agent = DQN(state_space=env.state_space, action_space=env.action_space, network_fn=network_fn, learning_rate=1e-3, discount=0.95, exploration=0.1, target_network_update_frequency=10) experiment.train(agent=agent, env=env, num_episodes=10) experiment.evaluate(agent=agent, env=env, num_episodes=10) agent.close() env.close()
def test_cartpole(self): env = OpenAIGym(level='CartPole', max_timesteps=100) hidden_size = 16 actor_fn = (lambda: torch.nn.Sequential( torch.nn.Linear(in_features=env.state_space['shape'][0], out_features=hidden_size), torch.nn.Tanh(), torch.nn.Linear(in_features=hidden_size, out_features=env.action_space['num_values']))) critic_fn = (lambda: torch.nn.Sequential( torch.nn.Linear(in_features=env.state_space['shape'][0], out_features=hidden_size), torch.nn.Tanh(), torch.nn.Linear(in_features=hidden_size, out_features=1))) agent = AC(state_space=env.state_space, action_space=env.action_space, actor_fn=actor_fn, actor_learning_rate=1e-3, critic_fn=critic_fn, critic_learning_rate=1e-3) experiment.train(agent=agent, env=env, num_episodes=10) experiment.evaluate(agent=agent, env=env, num_episodes=10) agent.close() env.close()