def load(noise_scale, seed): """Load a cartpole experiment with the prescribed settings.""" env = wrappers.RewardNoise(env=cartpole.Cartpole(seed=seed), noise_scale=noise_scale, seed=seed) env.bsuite_num_episodes = sweep.NUM_EPISODES return env
def load(noise_scale, seed): """Load a mountain_car experiment with the prescribed settings.""" env = wrappers.RewardNoise(env=mountain_car.MountainCar(seed=seed), noise_scale=noise_scale, seed=seed) env.bsuite_num_episodes = sweep.NUM_EPISODES return env
def load(noise_scale, seed): """Load a bandit_noise experiment with the prescribed settings.""" env = wrappers.RewardNoise(env=bandit.SimpleBandit(seed=seed), noise_scale=noise_scale, seed=seed) env.bsuite_num_episodes = sweep.NUM_EPISODES return env
def test_unwrap(self): raw_env = FakeEnvironment([dm_env.restart([])]) scale_env = wrappers.RewardScale(raw_env, reward_scale=1.) noise_env = wrappers.RewardNoise(scale_env, noise_scale=1.) logging_env = wrappers.Logging(noise_env, logger=None) # pytype: disable=wrong-arg-types unwrapped = logging_env.raw_env self.assertEqual(id(raw_env), id(unwrapped))
def load(mean_rewards=None, seed=None): """Load a bandit_noise experiment with the prescribed settings.""" env = wrappers.RewardNoise(env=bandit.SimpleBandit(rewards=mean_rewards, seed=seed), bernoulli=True, seed=seed) env.bsuite_num_episodes = sweep.NUM_EPISODES return env