def test_env_seeding(env_name): seeding.set_global_seed(123) env1 = gym_make(env_name) seeding.set_global_seed(456) env2 = gym_make(env_name) seeding.set_global_seed(123) env3 = gym_make(env_name) if deepcopy(env1).is_online(): traj1 = get_env_trajectory(env1, 500) traj2 = get_env_trajectory(env2, 500) traj3 = get_env_trajectory(env3, 500) assert not compare_trajectories(traj1, traj2) assert compare_trajectories(traj1, traj3)
def test_env_seeding(env_name): seeder1 = Seeder(123) env1 = gym_make(env_name) env1.reseed(seeder1) seeder2 = Seeder(456) env2 = gym_make(env_name) env2.reseed(seeder2) seeder3 = Seeder(123) env3 = gym_make(env_name) env3.reseed(seeder3) if deepcopy(env1).is_online(): traj1 = get_env_trajectory(env1, 500) traj2 = get_env_trajectory(env2, 500) traj3 = get_env_trajectory(env3, 500) assert not compare_trajectories(traj1, traj2) assert compare_trajectories(traj1, traj3)
def test_dqn_agent(use_double_dqn, use_prioritized_replay): env = gym_make("CartPole-v0") agent = DQNAgent( env, learning_starts=5, eval_interval=75, train_interval=2, gradient_steps=-1, use_double_dqn=use_double_dqn, use_prioritized_replay=use_prioritized_replay, ) agent.fit(budget=500)
def test_dqn_agent(): env = gym_make("CartPole-v0") params = {"n_episodes": 10} agent = DQNAgent(env, **params) agent.fit() agent.policy(env.observation_space.sample())
from rlberry.envs import gym_make from rlberry.agents import RSUCBVIAgent from rlberry.utils.logging import configure_logging from rlberry.wrappers import RescaleRewardWrapper configure_logging("DEBUG") env = gym_make('Acrobot-v1') env.reward_range = (-1.0, 0.0) # missing in gym implementation # rescake rewards to [0, 1] env = RescaleRewardWrapper(env, (0.0, 1.0)) agent = RSUCBVIAgent(env, n_episodes=10, gamma=0.99, horizon=200, bonus_scale_factor=0.1, min_dist=0.2) agent.fit() state = env.reset() for tt in range(200): action = agent.policy(state) next_state, reward, done, _ = env.step(action) state = next_state env.render() env.close()
def fit(self, **kwargs): result = self.wrapped.learn(**kwargs) info = {} # possibly store something from results return info def policy(self, observation, **kwargs): action, _state = self.wrapped.predict(observation, **kwargs) return action # # Traning one agent # env = gym_make('CartPole-v1') agent = A2CAgent(env, 'MlpPolicy', verbose=1) agent.fit(total_timesteps=1000) obs = env.reset() for i in range(1000): action = agent.policy(obs, deterministic=True) obs, reward, done, info = env.step(action) env.render() if done: break env.close() # # Traning several agents and comparing different hyperparams #
from rlberry.envs import gym_make from pathlib import Path from torch.utils.tensorboard import SummaryWriter from rlberry.agents.dqn import DQNAgent from rlberry.utils.logging import configure_logging configure_logging(level="DEBUG") env = gym_make("CartPole-v0") agent = DQNAgent(env, n_episodes=50, exploration_kwargs={"tau": 1000}) agent.set_writer(SummaryWriter()) print(f"Running DQN on {env}") print(f"Visualize with tensorboard by \ running:\n$tensorboard --logdir {Path(agent.writer.log_dir).parent}") agent.fit() for episode in range(3): done = False state = env.reset() while not done: action = agent.policy(state) state, reward, done, _ = env.step(action) env.render() env.close()
seeder = Seeder(123) # Each Seeder instance has a random number generator (rng) # See https://numpy.org/doc/stable/reference/random/generator.html to check the # methods available in rng. seeder.rng.integers(5) seeder.rng.normal() print(type(seeder.rng)) # etc # Environments and agents should be seeded using a single seeder, # to ensure that their random number generators are independent. from rlberry.envs import gym_make from rlberry.agents import RSUCBVIAgent env = gym_make("MountainCar-v0") env.reseed(seeder) agent = RSUCBVIAgent(env) agent.reseed(seeder) # Environments and Agents have their own seeder and rng. # When writing your own agents and inheriring from the Agent class, # you should use agent.rng whenever you need to generate random numbers; # the same applies to your environments. # This is necessary to ensure reproducibility. print("env seeder: ", env.seeder) print("random sample from env rng: ", env.rng.normal()) print("agent seeder: ", agent.seeder) print("random sample from agent rng: ", agent.rng.normal())