def create_er_from_episodes(discrete, max_len, num_steps, num_episodes, episode_length): """Rollout an environment and return an Experience Replay Buffer.""" if discrete: env = GymEnvironment("NChain-v0") transformations = [] else: env = GymEnvironment("Pendulum-v0") transformations = [ MeanFunction(lambda state_, action_: state_), StateNormalizer(), ActionNormalizer(), RewardClipper(), ] memory = ExperienceReplay(max_len, transformations=transformations, num_steps=num_steps) for _ in range(num_episodes): state = env.reset() for _ in range(episode_length): action = env.action_space.sample() # sample a random action. observation, state, done, info = step_env(env, state, action, action_scale=1.0) memory.append(observation) memory.end_episode() return memory
def rollout_agent(environment, base_agent, extender, num_steps, td_k=True): torch.manual_seed(SEED) np.random.seed(SEED) environment = GymEnvironment(environment, SEED) agent = extender.default( environment, base_agent_name=base_agent, num_steps=num_steps, num_samples=2, num_iter=2, num_epochs=2, td_k=td_k, ) train_agent( agent, environment, num_episodes=NUM_EPISODES, max_steps=MAX_STEPS, plot_flag=False, ) evaluate_agent(agent, environment, num_episodes=NUM_EPISODES, max_steps=MAX_STEPS, render=False) agent.logger.delete_directory() # Cleanup directory.
def init_experiment(args, **kwargs): """Initialize experiment.""" arg_dict = vars(args) arg_dict.update(kwargs) arg_dict.update(parse_config_file(args.agent_config)) arg_dict = {k: v for k, v in arg_dict.items() if v is not None} env_config = parse_config_file(args.env_config) args.max_steps = env_config.get("max_steps", 1000) # %% Set Random seeds. set_random_seed(args.seed) # %% Initialize environment. if env_config["name"] in gym_envs: environment = GymEnvironment(env_config["name"], seed=args.seed) else: env_name, env_task = env_config["name"].split("/") environment = DMSuiteEnvironment(env_name, env_task, seed=args.seed) # %% Initialize module. agent_module = importlib.import_module("rllib.agent") agent = getattr(agent_module, f"{args.agent}Agent").default( environment, reward_transformer=RewardTransformer( scale=arg_dict.get("reward_scale", 1.0)), **arg_dict, ) agent.logger.save_hparams(arg_dict) return agent, environment
def test_policies(environment, policy, batch_size): environment = GymEnvironment(environment, SEED) critic = NNQFunction( dim_state=environment.dim_observation, dim_action=environment.dim_action, num_states=environment.num_states, num_actions=environment.num_actions, layers=LAYERS, tau=TARGET_UPDATE_TAU, ) policy = policy(critic, 0.1) optimizer = torch.optim.Adam(critic.parameters(), lr=LEARNING_RATE) criterion = torch.nn.MSELoss agent = SARSAAgent( critic=critic, policy=policy, criterion=criterion, optimizer=optimizer, batch_size=batch_size, target_update_frequency=TARGET_UPDATE_FREQUENCY, gamma=GAMMA, ) train_agent( agent, environment, num_episodes=NUM_EPISODES, max_steps=MAX_STEPS, plot_flag=False, ) evaluate_agent(agent, environment, 1, MAX_STEPS, render=False) agent.logger.delete_directory() # Cleanup directory.
def get_env(self, gym_env, **kwargs): """Get environment.""" if gym_env: env = GymEnvironment(f"{self.name}-v0", **kwargs) else: module = importlib.import_module("rllib.environment.mdps") env = getattr(module, self.name)(**kwargs) return env
def main(args): """Run experiment.""" set_random_seed(args.seed) env_config = parse_config_file(args.env_config_file) environment = GymEnvironment( env_config["name"], ctrl_cost_weight=env_config["action_cost"], seed=args.seed ) reward_model = environment.env.reward_model() if args.exploration == "optimistic": dynamical_model = HallucinatedModel.default(environment, beta=args.beta) environment.add_wrapper(HallucinationWrapper) else: dynamical_model = TransformedModel.default(environment) kwargs = parse_config_file(args.agent_config_file) agent = getattr( importlib.import_module("rllib.agent"), f"{args.agent}Agent" ).default( environment=environment, dynamical_model=dynamical_model, reward_model=reward_model, thompson_sampling=args.exploration == "thompson", **kwargs, ) train_agent( agent=agent, environment=environment, max_steps=env_config["max_steps"], num_episodes=args.train_episodes, render=args.render, print_frequency=1, ) evaluate_agent( agent=agent, environment=environment, max_steps=env_config["max_steps"], num_episodes=args.test_episodes, )
def rollout_agent(environment, agent): torch.manual_seed(SEED) np.random.seed(SEED) environment = GymEnvironment(environment, SEED) agent = agent.default(environment, num_iter=2, num_epochs=2) train_agent( agent, environment, num_episodes=NUM_EPISODES, max_steps=MAX_STEPS, plot_flag=False, ) evaluate_agent(agent, environment, 1, MAX_STEPS, render=False) agent.logger.delete_directory() # Cleanup directory.
def test_reward(environment, action_cost, action_type): env_name, reward_model_ = environment if action_cost is not None: env = GymEnvironment(env_name, action_cost=action_cost) else: env = GymEnvironment(env_name) state = env.reset() if action_cost is not None: reward_model = reward_model_(action_cost=action_cost) else: reward_model = reward_model_() reward_model.set_goal(env.goal) for _ in range(50): if action_type == "random": action = env.action_space.sample() elif action_type == "zero": action = np.zeros(env.dim_action) else: raise NotImplementedError next_state, reward, done, info = env.step(action) if env.goal is not None: state = np.concatenate((state, env.goal)) np.testing.assert_allclose(reward, reward_model(state, action, next_state)[0], rtol=1e-3, atol=1e-6) np.testing.assert_allclose( np.tile(reward, (5, )), reward_model( np.tile(state, (5, 1)), np.tile(action, (5, 1)), np.tile(next_state, (5, 1)), )[0], rtol=1e-3, atol=1e-6, ) state = torch.tensor(state, dtype=torch.get_default_dtype()) action = torch.tensor(action, dtype=torch.get_default_dtype()) next_state = torch.tensor(next_state, dtype=torch.get_default_dtype()) np.testing.assert_allclose(reward, reward_model(state, action, next_state)[0], rtol=1e-3, atol=1e-6) np.testing.assert_allclose( np.tile(reward, (5, 1)), reward_model(state.repeat(5, 1), action.repeat(5, 1), next_state.repeat(5, 1))[0], rtol=1e-3, atol=1e-6, ) state = next_state.numpy()
def test_set_state_np(self): env = GymEnvironment("VPendulum-v0") env.reset() state, action = self.state_action action = np.round(action) env.state = state obs, _, _, _ = env.step(action) state = env.state np.testing.assert_allclose(obs, state)
def test_tolerance(action_cost): env_name, reward_model_ = ("MBRLReacher3D-v0", ReacherReward) if action_cost is not None: env = GymEnvironment(env_name, action_cost=action_cost, sparse=True) else: env = GymEnvironment(env_name, sparse=True) state = env.reset() if action_cost is not None: reward_model = reward_model_(action_cost=action_cost, sparse=True) else: reward_model = reward_model_(sparse=True) reward_model.set_goal(env.goal) for _ in range(50): action = env.action_space.sample() next_state, reward, done, info = env.step(action) if env.goal is not None: state = np.concatenate((state, env.goal)) np.testing.assert_allclose(reward, reward_model(state, action, next_state)[0], rtol=1e-3, atol=1e-6) np.testing.assert_allclose( np.tile(reward, (5, )), reward_model( np.tile(state, (5, 1)), np.tile(action, (5, 1)), np.tile(next_state, (5, 1)), )[0], rtol=1e-3, atol=1e-6, ) state = torch.tensor(state, dtype=torch.get_default_dtype()) action = torch.tensor(action, dtype=torch.get_default_dtype()) next_state = torch.tensor(next_state, dtype=torch.get_default_dtype()) np.testing.assert_allclose(reward, reward_model(state, action, next_state)[0], rtol=1e-3, atol=1e-6) np.testing.assert_allclose( np.tile(reward, (5, 1)), reward_model(state.repeat(5, 1), action.repeat(5, 1), next_state.repeat(5, 1))[0], rtol=1e-3, atol=1e-6, ) state = next_state.numpy()
"""Python Script Template.""" from rllib.environment import GymEnvironment from rllib.util.utilities import set_random_seed from qreps.environment.random_action_wrapper import RandomActionWrapper import os from exps.utilities import parse_arguments, run_experiment from exps.environments.utilities import get_saddle_agents, get_benchmark_agents args = parse_arguments() args.env_name = "RiverSwim-v0" args.lr = 0.01 args.eta = 5.0 set_random_seed(args.seed) env = GymEnvironment(args.env_name, seed=args.seed) env.add_wrapper(RandomActionWrapper, p=args.random_action_p) agents = get_saddle_agents(env, **vars(args)) agents.update(get_benchmark_agents(env, **vars(args))) df = run_experiment(agents, env, args) df.to_pickle(f"river_swim_results_{args.seed}.pkl") os.system("python river_swim_plot.py")
transitions[(state, action)].append({ "next_state": 0, "reward": reward, "probability": 1 }) # Noisy transitions return transitions if __name__ == "__main__": from rllib.environment import GymEnvironment from rllib.environment.utilities import transitions2kernelreward import qreps # noqa: F401 env = GymEnvironment("DeepSea-v0", side=5) kernel, reward = transitions2kernelreward(env.env.transitions, env.num_states, env.num_actions) state = env.reset() print(state) for i in range(10): action = env.action_space.sample() next_state, r, done, f = env.step(action) print( env.env._state_to_grid(state), env.env._state_to_grid(next_state), action, r, done, ) state = next_state
def discrete_environment(request): return GymEnvironment(request.param)
def environment(request): return GymEnvironment(request.param)
def continuous_action_environment(request): return GymEnvironment(request.param)
import torch.optim from rllib.agent import DPGAgent, TD3Agent # noqa: F401 from rllib.dataset import ExperienceReplay, PrioritizedExperienceReplay # noqa: F401 from rllib.environment import GymEnvironment from rllib.util.parameter_decay import ExponentialDecay from rllib.util.training.agent_training import evaluate_agent, train_agent ENVIRONMENT = ["MountainCarContinuous-v0", "Pendulum-v0"][0] NUM_EPISODES = 25 MAX_STEPS = 2500 GAMMA = 0.99 EPS_START = 1.0 EPS_END = 0.1 EPS_DECAY = 1e6 SEED = 0 torch.manual_seed(SEED) np.random.seed(SEED) environment = GymEnvironment(ENVIRONMENT, SEED) noise = ExponentialDecay(EPS_START, EPS_END, EPS_DECAY) agent = DPGAgent.default(environment, exploration_noise=noise, gamma=GAMMA) train_agent(agent, environment, num_episodes=NUM_EPISODES, max_steps=MAX_STEPS, render=True) evaluate_agent(agent, environment, 1, MAX_STEPS)
"probability": 0.5, "reward": reward }) for j in range(8): for a in range(2): transitions[(3 + j, a)].append({ "next_state": 0, "probability": 1.0, "reward": 0 }) return transitions if __name__ == "__main__": from rllib.environment import GymEnvironment from rllib.environment.utilities import transitions2kernelreward import qreps # noqa: F401 env = GymEnvironment("WideTree-v0", reward=1) kernel, reward = transitions2kernelreward(env.env.transitions, env.num_states, env.num_actions) print(kernel, reward) state = env.reset() print(state) for i in range(10): action = env.action_space.sample() next_state, r, done, f = env.step(action) print(state, action, next_state, r, done) state = next_state
"next_state": length, "probability": 0.9, "reward": 1 }) transitions[(length, 0)].append({ "next_state": length - 1, "probability": 0.1, "reward": 0 }) return transitions if __name__ == "__main__": from rllib.environment import GymEnvironment from rllib.environment.utilities import transitions2kernelreward import qreps # noqa: F401 env = GymEnvironment("RiverSwim-v0", length=5) kernel, reward = transitions2kernelreward(env.env.transitions, env.num_states, env.num_actions) print(kernel, reward) state = env.reset() print(state) for i in range(100): action = env.action_space.sample() next_state, r, done, f = env.step(action) print(state, action, next_state, r, done) state = next_state