def test_cma_es_cartpole(self): """Test CMAES with Cartpole-v1 environment.""" with LocalTFRunner() as runner: env = TfEnv(env_name='CartPole-v1') policy = CategoricalMLPPolicy( name='policy', env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) runner.initialize_tf_vars() n_samples = 20 algo = CMAES( env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=100, n_samples=n_samples) runner.setup(algo, env, sampler_cls=OnPolicyVectorizedSampler) runner.train(n_epochs=1, batch_size=1000, n_epoch_cycles=n_samples) # No assertion on return because CMAES is not stable. env.close()
def cma_es_cartpole(ctxt=None, seed=1): """Train CMA_ES with Cartpole-v1 environment. Args: ctxt (garage.experiment.ExperimentContext): The experiment configuration used by LocalRunner to create the snapshotter. seed (int): Used to seed the random number generator to produce determinism. """ set_seed(seed) with LocalTFRunner(ctxt) as runner: env = TfEnv(env_name='CartPole-v1') policy = CategoricalMLPPolicy(name='policy', env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) n_samples = 20 algo = CMAES(env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=100, n_samples=n_samples) runner.setup(algo, env, sampler_cls=OnPolicyVectorizedSampler) runner.train(n_epochs=100, batch_size=1000)
def cma_es_cartpole(ctxt=None, seed=1): """Train CMA_ES with Cartpole-v1 environment. Args: ctxt (garage.experiment.ExperimentContext): The experiment configuration used by Trainer to create the snapshotter. seed (int): Used to seed the random number generator to produce determinism. """ set_seed(seed) with TFTrainer(ctxt) as trainer: env = GymEnv('CartPole-v1') policy = CategoricalMLPPolicy(name='policy', env_spec=env.spec, hidden_sizes=(32, 32)) n_samples = 20 sampler = LocalSampler(agents=policy, envs=env, max_episode_length=env.spec.max_episode_length, is_tf_worker=True) algo = CMAES(env_spec=env.spec, policy=policy, sampler=sampler, n_samples=n_samples) trainer.setup(algo, env) trainer.train(n_epochs=100, batch_size=1000)
def run_task(snapshot_config, *_): """Train CMA_ES with Cartpole-v1 environment. Args: snapshot_config (garage.experiment.SnapshotConfig): The snapshot configuration used by LocalRunner to create the snapshotter. *_ (object): Ignored by this function. """ with LocalTFRunner(snapshot_config=snapshot_config) as runner: env = TfEnv(env_name='CartPole-v1') policy = CategoricalMLPPolicy(name='policy', env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) n_samples = 20 algo = CMAES(env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=100, n_samples=n_samples) runner.setup(algo, env, sampler_cls=OnPolicyVectorizedSampler) runner.train(n_epochs=100, batch_size=1000)
def test_cma_es_cartpole(self): """Test CMAES with Cartpole-v1 environment.""" with TFTrainer(snapshot_config) as trainer: env = GymEnv('CartPole-v1') policy = CategoricalMLPPolicy(name='policy', env_spec=env.spec, hidden_sizes=(32, 32)) n_samples = 20 sampler = LocalSampler( agents=policy, envs=env, max_episode_length=env.spec.max_episode_length, is_tf_worker=True) algo = CMAES(env_spec=env.spec, policy=policy, sampler=sampler, n_samples=n_samples) trainer.setup(algo, env) trainer.train(n_epochs=1, batch_size=1000) # No assertion on return because CMAES is not stable. env.close()
def run_task(snapshot_config, *_): """Run task.""" with LocalTFRunner(snapshot_config=snapshot_config) as runner: # env = TfEnv(normalize(MassSpringEnv_OptK_HwAsAction(params), normalize_action=False, normalize_obs=False, normalize_reward=True, reward_alpha=0.1)) env = TfEnv(MassSpringEnv_OptK_HwAsAction(params)) zip_project(log_dir=runner._snapshotter._snapshot_dir) comp_policy_model = MLPModel( output_dim=1, hidden_sizes=params.comp_policy_network_size, hidden_nonlinearity=tf.nn.tanh, output_nonlinearity=tf.nn.tanh, ) mech_policy_model = MechPolicyModel_OptK_HwAsAction(params) policy = CompMechPolicy_OptK_HwAsAction( name='comp_mech_policy', env_spec=env.spec, comp_policy_model=comp_policy_model, mech_policy_model=mech_policy_model) # baseline = GaussianMLPBaseline( # env_spec=env.spec, # regressor_args=dict( # hidden_sizes=params.baseline_network_size, # hidden_nonlinearity=tf.nn.tanh, # use_trust_region=True, # ), # ) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = CMAES(env_spec=env.spec, policy=policy, baseline=baseline, **params.cmaes_algo_kwargs) runner.setup(algo, env) runner.train(**params.cmaes_train_kwargs)
def run_task(snapshot_config, *_): """Train CMA_ES with Cartpole-v1 environment.""" with LocalTFRunner(snapshot_config=snapshot_config) as runner: env = TfEnv(env_name='CartPole-v1') policy = CategoricalMLPPolicy(name='policy', env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) n_samples = 20 algo = CMAES(env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=100, n_samples=n_samples) runner.setup(algo, env, sampler_cls=OnPolicyVectorizedSampler) # NOTE: make sure that n_epoch_cycles == n_samples ! runner.train(n_epochs=100, batch_size=1000, n_epoch_cycles=n_samples)
def test_cma_es_cartpole(self): """Test CMAES with Cartpole-v1 environment.""" with TFTrainer(snapshot_config) as trainer: env = GymEnv('CartPole-v1') policy = CategoricalMLPPolicy(name='policy', env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) n_samples = 20 algo = CMAES(env_spec=env.spec, policy=policy, baseline=baseline, n_samples=n_samples) trainer.setup(algo, env, sampler_cls=LocalSampler) trainer.train(n_epochs=1, batch_size=1000) # No assertion on return because CMAES is not stable. env.close()