def test_pickleable(self): inner_env = CartpoleEnv(obs_noise=5.) env = OcclusionEnv(inner_env, [1]) round_trip = pickle.loads(pickle.dumps(env)) assert round_trip obs = inner_env.reset() assert round_trip.occlude(obs) == env.occlude(obs) assert round_trip.env.obs_noise == env.env.obs_noise step_env(round_trip)
def run_task(*_): env = TheanoEnv(normalize(CartpoleEnv())) policy = DeterministicMLPPolicy( env_spec=env.spec, # The neural network policy should have two hidden layers, # each with 32 hidden units. hidden_sizes=(32, 32)) es = OUStrategy(env_spec=env.spec) qf = ContinuousMLPQFunction(env_spec=env.spec) algo = DDPG( env=env, policy=policy, es=es, qf=qf, batch_size=32, max_path_length=100, epoch_length=1000, min_pool_size=10000, n_epochs=1000, discount=0.99, scale_reward=0.01, qf_learning_rate=1e-3, policy_learning_rate=1e-4, # Uncomment both lines (this and the plot parameter below) to enable # plotting plot=True, ) algo.train()
def run_task(*_): env = TfEnv(normalize(CartpoleEnv())) policy = GaussianMLPPolicy( env_spec=env.spec, hidden_sizes=(32, 32), hidden_nonlinearity=tf.nn.tanh, output_nonlinearity=None, ) baseline = GaussianMLPBaseline( env_spec=env.spec, regressor_args=dict(hidden_sizes=(32, 32)), ) algo = InstrumentedTRPO( env=env, policy=policy, baseline=baseline, batch_size=1024, max_path_length=100, n_itr=4, discount=0.99, gae_lambda=0.98, policy_ent_coeff=0.0, plot=True, ) algo.train()
def test_does_not_modify_action(self): inner_env = CartpoleEnv(frame_skip=10) env = NoisyObservationEnv(inner_env, obs_noise=5.) a = env.action_space.sample() a_copy = a.copy() env.step(a) self.assertEquals(a.all(), a_copy.all())
def run_task(v): env = TheanoEnv(normalize(CartpoleEnv())) policy = GaussianMLPPolicy( env_spec=env.spec, # The neural network policy should have two hidden layers, # each with 32 hidden units. hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=100, n_itr=40, discount=0.99, step_size=v["step_size"], # Uncomment both lines (this and the plot parameter below) to enable # plotting plot=True, ) algo.train()
def test_does_not_modify_action(self): inner_env = CartpoleEnv(obs_noise=5.) env = OcclusionEnv(inner_env, [1]) a = env.action_space.sample() a_copy = a env.reset() env.step(a) self.assertEquals(a, a_copy)
def test_pickleable(self): inner_env = CartpoleEnv(obs_noise=5.) env = SlidingMemEnv(inner_env, n_steps=10) round_trip = pickle.loads(pickle.dumps(env)) assert round_trip assert round_trip.n_steps == env.n_steps assert round_trip.env.obs_noise == env.env.obs_noise step_env(round_trip)
def test_does_not_modify_action(self): inner_env = CartpoleEnv(obs_noise=5.) env = SlidingMemEnv(inner_env, n_steps=10) a = env.action_space.sample() a_copy = a.copy() env.reset() env.step(a) self.assertEquals(a.all(), a_copy.all())
def test_pickleable(self): inner_env = CartpoleEnv(obs_noise=5.) env = NormalizedEnv(inner_env, scale_reward=10.) round_trip = pickle.loads(pickle.dumps(env)) assert round_trip assert round_trip._scale_reward == env._scale_reward assert round_trip.env.obs_noise == env.env.obs_noise step_env(round_trip)
def test_does_not_modify_action(self): inner_env = CartpoleEnv(obs_noise=5.) env = NormalizedEnv(inner_env, scale_reward=10.) a = env.action_space.sample() a_copy = a env.reset() env.step(a) self.assertEquals(a, a_copy)
def test_pickleable(self): inner_env = CartpoleEnv(frame_skip=10) env = DelayedActionEnv(inner_env, action_delay=10) round_trip = pickle.loads(pickle.dumps(env)) assert round_trip assert round_trip.action_delay == env.action_delay assert round_trip.env.frame_skip == env.env.frame_skip step_env(round_trip)
def test_pickleable(self): inner_env = CartpoleEnv(frame_skip=10) env = NoisyObservationEnv(inner_env, obs_noise=5.) round_trip = pickle.loads(pickle.dumps(env)) assert round_trip assert round_trip.obs_noise == env.obs_noise assert round_trip.env.frame_skip == env.env.frame_skip step_env(round_trip)
def test_does_not_modify_action(self): inner_env = CartpoleEnv(frame_skip=10) env = DelayedActionEnv(inner_env, action_delay=10) env.reset() a = env.action_space.sample() a_copy = a.copy() env.reset() env.step(a) self.assertEquals(a.all(), a_copy.all())
def test_baseline(self, baseline_cls): env = TheanoEnv(CartpoleEnv()) policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=(6, )) baseline = baseline_cls(env_spec=env.spec) algo = VPG(env=env, policy=policy, baseline=baseline, n_itr=1, batch_size=1000, max_path_length=100) algo.train()
def test_adaptive_std(): """ Checks if the adaptive_std parameter works. """ env = TheanoEnv(CartpoleEnv()) policy = GaussianMLPPolicy(env_spec=env, adaptive_std=True) baseline = ZeroBaseline(env_spec=env.spec) algo = TRPO(env=env, policy=policy, baseline=baseline, batch_size=100, n_itr=1) algo.train()
def test_issue_3(): """ As reported in https://github.com/garage/garage/issues/3, the adaptive_std parameter was not functioning properly """ env = CartpoleEnv() policy = GaussianMLPPolicy(env_spec=env, adaptive_std=True) baseline = ZeroBaseline(env_spec=env.spec) algo = TRPO(env=env, policy=policy, baseline=baseline, batch_size=100, n_itr=1) algo.train()
def run_task(*_): env = normalize(CartpoleEnv()) policy = DummyPolicy(env_spec=env) baseline = LinearFeatureBaseline(env_spec=env) algo = InstrumentedNOP(env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=100, n_itr=4, discount=0.99, step_size=0.01, plot=True) algo.train()
def run_task(*_): env = TheanoEnv(normalize(CartpoleEnv())) policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = InstrumentedTRPO(env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=100, n_itr=4, discount=0.99, step_size=0.01, plot=True) algo.train()
def run_task(*_): """Wrap ERWR training task in the run_task function.""" env = TfEnv(normalize(CartpoleEnv())) policy = GaussianMLPPolicy(name="policy", env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = ERWR(env=env, policy=policy, baseline=baseline, batch_size=10000, max_path_length=100, n_itr=40, discount=0.99) algo.train()
def run_task(*_): env = TheanoEnv(normalize(CartpoleEnv())) policy = GaussianGRUPolicy(env_spec=env.spec, ) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO(env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=100, n_itr=10, discount=0.99, step_size=0.01, optimizer=ConjugateGradientOptimizer( hvp_approach=FiniteDifferenceHvp(base_eps=1e-5))) algo.train()
def test_ddpg(self): env = TheanoEnv(CartpoleEnv()) policy = DeterministicMLPPolicy(env.spec) qf = ContinuousMLPQFunction(env.spec) es = OUStrategy(env.spec) algo = DDPG( env=env, policy=policy, qf=qf, es=es, n_epochs=1, epoch_length=100, batch_size=32, min_pool_size=50, replay_pool_size=1000, eval_samples=100, ) algo.train()
def run_task(v): env = normalize(CartpoleEnv()) policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=100, n_itr=40, discount=0.99, step_size=v["step_size"], # plot=True, ) algo.train()
def run_task(*_): """Wrap VPG training task in the run_task function.""" env = TfEnv(normalize(CartpoleEnv())) policy = GaussianMLPPolicy( name="policy", env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = VPG( env=env, policy=policy, baseline=baseline, batch_size=10000, max_path_length=100, n_itr=100, discount=0.99, optimizer_args=dict(tf_optimizer_args=dict(learning_rate=0.01, ))) algo.train()
def test_ddpg(self): env = TheanoEnv(CartpoleEnv()) policy = DeterministicMLPPolicy(env.spec) qf = ContinuousMLPQFunction(env.spec) es = OUStrategy(env.spec) replay_buffer = SimpleReplayBuffer(env_spec=env.spec, size_in_transitions=int(1000), time_horizon=100) algo = DDPG( env=env, policy=policy, qf=qf, es=es, pool=replay_buffer, n_epochs=1, epoch_length=100, batch_size=32, min_pool_size=50, eval_samples=100, ) algo.train()
def run_task(*_): env = TheanoEnv(normalize(CartpoleEnv())) policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=100, n_itr=1000, discount=0.99, step_size=0.01, # Uncomment both lines (this and the plot parameter below) to enable # plotting #plot=True ) algo.train()
def test_erwr_cartpole(self): """Test ERWR with Cartpole environment.""" logger.reset() env = TfEnv(normalize(CartpoleEnv())) policy = GaussianMLPPolicy( name="policy", env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = ERWR( env=env, policy=policy, baseline=baseline, batch_size=10000, max_path_length=100, n_itr=10, discount=0.99) last_avg_ret = algo.train(sess=self.sess) assert last_avg_ret > 100
def test_tnpg_cartpole(self): """Test TNPG with Cartpole environment.""" logger.reset() env = TfEnv(normalize(CartpoleEnv())) policy = GaussianMLPPolicy(name="policy", env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TNPG(env=env, policy=policy, baseline=baseline, batch_size=10000, max_path_length=100, n_itr=10, discount=0.99, optimizer_args=dict(reg_coeff=5e-2)) last_avg_ret = algo.train(sess=self.sess) assert last_avg_ret > 40
def test_gaussian_policies(self, policy_cls): logger._tensorboard = TensorBoardOutput() env = TfEnv(normalize(CartpoleEnv())) policy = policy_cls(name="policy", env_spec=env.spec) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=100, n_itr=1, discount=0.99, step_size=0.01, plot=True, optimizer=ConjugateGradientOptimizer, optimizer_args=dict(hvp_approach=FiniteDifferenceHvp( base_eps=1e-5)), ) algo.train(sess=self.sess)
from garage.baselines import LinearFeatureBaseline from garage.envs import normalize from garage.envs.box2d import CartpoleEnv from garage.theano.algos import TRPO from garage.theano.envs import TheanoEnv from garage.theano.policies import GaussianMLPPolicy env = TheanoEnv(normalize(CartpoleEnv())) policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=100, n_itr=40, discount=0.99, step_size=0.01, # plot=True ) algo.train()
from garage.baselines import LinearFeatureBaseline from garage.envs import normalize from garage.envs.box2d import CartpoleEnv from garage.tf.algos import TRPO import garage.tf.core.layers as L from garage.tf.envs import TfEnv from garage.tf.optimizers import ConjugateGradientOptimizer, FiniteDifferenceHvp from garage.tf.policies import GaussianLSTMPolicy env = TfEnv(normalize(CartpoleEnv())) policy = GaussianLSTMPolicy( name="policy", env_spec=env.spec, lstm_layer_cls=L.TfBasicLSTMLayer, # gru_layer_cls=L.GRULayer, ) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=100, n_itr=10, discount=0.99, step_size=0.01, optimizer=ConjugateGradientOptimizer( hvp_approach=FiniteDifferenceHvp(base_eps=1e-5)))