def run_task(v): env = TfEnv(normalize(CartpoleEnv())) policy = GaussianMLPPolicy( env_spec=env.spec, # The neural network policy should have two hidden layers, # each with 32 hidden units. hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=100, n_itr=40, discount=0.99, step_size=v["step_size"], # Uncomment both lines (this and the plot parameter below) to enable # plotting plot=True, ) algo.train()
def run_task(v): """ We wrap the main training loop in the run_task function so that run_experiment can easily execute variants of the experiment on different machines """ env = TfEnv(env_name="CartPole-v1") policy = CategoricalMLPPolicy( env_spec=env.spec, # The neural network policy should have two hidden layers, # each with 32 hidden units. hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=100, n_itr=40, discount=0.99, step_size=v["step_size"], # Uncomment both lines (this and the plot parameter below) to enable # plotting plot=True, ) algo.train()
def run_task(*_): env = TfEnv( normalize( GridworldGathererEnv( plot={ 'visitation': { # 'save': '~/garage/data/local/gridworld/instant-run', 'save': False, 'live': True } }))) policy = CategoricalMLPPolicy(env_spec=env.spec, hidden_sizes=(64, 64)) baseline = GaussianMLPBaseline(env_spec=env.spec) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=5000, max_path_length=100, n_itr=50, discount=0.99, step_size=0.01, ) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as session: algo.train(sess=session)
def test_dm_control_tf_policy(self): task = ALL_TASKS[0] with self.graph.as_default(): env = TfEnv(DmControlEnv.from_suite(*task)) policy = GaussianMLPPolicy( env_spec=env.spec, hidden_sizes=(32, 32), ) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=10, max_path_length=5, n_itr=1, discount=0.99, step_size=0.01, ) algo.train() env.close()
def run_task(*_): env = TfEnv( normalize( MinibotEnv( use_maps=[0, 1], # 'all', # [0,1] discretized=True))) policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=(64, 64)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO(env=env, policy=policy, baseline=baseline, batch_size=5000, max_path_length=100, n_itr=15, discount=0.99, step_size=0.01, plot=plot, pause_for_plot=False) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as session: algo.train(sess=session)
def run_task(*_): env = TfEnv( normalize( OneHotMultiTaskEnv(task_env_cls=PR2ArmClockEnv, task_args=TASK_ARGS, task_kwargs=TASK_KWARGS))) policy = GaussianMLPPolicy(name="policy", env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=100, n_itr=400000000, discount=0.99, step_size=0.01, plot=True, ) algo.train()
def run_garage(env, seed, log_dir): """ Create garage model and training. Replace the trpo with the algorithm you want to run. :param env: Environment of the task. :param seed: Random seed for the trail. :param log_dir: Log dir path. :return:import baselines.common.tf_util as U """ ext.set_seed(seed) with tf.Graph().as_default(): env = TfEnv(normalize(env)) policy = GaussianMLPPolicy( name="policy", env_spec=env.spec, hidden_sizes=(32, 32), hidden_nonlinearity=tf.nn.tanh, output_nonlinearity=None, ) baseline = GaussianMLPBaseline( env_spec=env.spec, regressor_args=dict( hidden_sizes=(32, 32), use_trust_region=True, ), ) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=1024, max_path_length=100, n_itr=976, discount=0.99, gae_lambda=0.98, clip_range=0.1, policy_ent_coeff=0.0, plot=False, ) # Set up logger since we are not using run_experiment tabular_log_file = osp.join(log_dir, "progress.csv") garage_logger.add_tabular_output(tabular_log_file) garage_logger.set_tensorboard_dir(log_dir) algo.train() garage_logger.remove_tabular_output(tabular_log_file) return tabular_log_file
def test_trpo_pendulum(self): """Test TRPO with Pendulum environment.""" logger.reset() env = TfEnv(normalize(gym.make("InvertedDoublePendulum-v2"))) policy = GaussianMLPPolicy( env_spec=env.spec, hidden_sizes=(64, 64), hidden_nonlinearity=tf.nn.tanh, output_nonlinearity=None, ) baseline = GaussianMLPBaseline( env_spec=env.spec, regressor_args=dict(hidden_sizes=(32, 32)), ) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=2048, max_path_length=100, n_itr=10, discount=0.99, gae_lambda=0.98, policy_ent_coeff=0.0, plot=False, ) last_avg_ret = algo.train(sess=self.sess) assert last_avg_ret > 50 env.close()
def run_task(v): env = normalize(PointEnv()) policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=100, n_itr=40, discount=0.99, step_size=v['step_size'], # plot=True, ) algo.train()
def run_task(*_): env = TfEnv(normalize(gym.make("Pendulum-v0"))) policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=env.max_episode_steps, n_itr=50, discount=0.99, step_size=0.01, plot=True, ) algo.train()
def run_task(*_): env = TfEnv(normalize(gym.make('MountainCar-v0'))) policy = CategoricalMLPPolicy(env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=env.max_episode_steps, n_itr=150, discount=0.99, step_size=0.1, plot=True, ) algo.train()
def run_task(*_): env = FlatGoalEnv(SawyerPickEnv(), obs_keys=["state_observation"]) env = TfEnv(normalize(env)) policy = GaussianMLPPolicy(name="policy", env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO(env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=500, n_itr=500, discount=0.99, step_size=0.01, plot=True) algo.train()
def run_task(*_): """Wrap TRPO training task in the run_task function.""" env = TfEnv(normalize(CartpoleEnv())) policy = GaussianMLPPolicy(name="policy", env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO(env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=100, n_itr=100, discount=0.99, max_kl_step=0.01, plot=False) algo.train()
def run_task(v): v = SimpleNamespace(**v) # Environment env = FlatTorqueReacher( fix_goal=True, fixed_goal=GOALS[0], reward_type="hand_distance", hand_distance_completion_bonus=0., torque_limit_pct=0.2, indicator_threshold=0.03, velocity_penalty_coeff=0.01, action_scale=10.0, hide_goal_pos=True, ) env = TfEnv(normalize(env)) # Policy policy = GaussianMLPPolicy( name="policy", env_spec=env.spec, hidden_sizes=(64, 32), init_std=v.policy_init_std, ) baseline = GaussianMLPBaseline(env_spec=env.spec) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=v.batch_size, # 4096 max_path_length=v.max_path_length, n_itr=1000, discount=0.99, step_size=0.01, plot=True, #optimizer_args=dict(max_grad_norm=0.5) ) algo.train()
def run_task(*_): env = TfEnv(normalize(PointEnv(goal=(-1, 0)))) policy = GaussianMLPPolicy(name="policy", env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=100, n_itr=100, discount=0.99, step_size=0.01, plot=False, force_batch_sampler=True, ) algo.train()
def test_categorical_policies(self, policy_cls): env = TfEnv(normalize(gym.make("CartPole-v0"))) policy = policy_cls(name="policy", env_spec=env.spec) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=100, n_itr=1, discount=0.99, step_size=0.01, plot=True, optimizer=ConjugateGradientOptimizer, optimizer_args=dict(hvp_approach=FiniteDifferenceHvp( base_eps=1e-5)), ) algo.train(sess=self.sess)
def test_gaussian_policies(self, policy_cls): logger._tensorboard = TensorBoardOutput() env = TfEnv(normalize(CartpoleEnv())) policy = policy_cls(name="policy", env_spec=env.spec) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=100, n_itr=1, discount=0.99, step_size=0.01, plot=True, optimizer=ConjugateGradientOptimizer, optimizer_args=dict(hvp_approach=FiniteDifferenceHvp( base_eps=1e-5)), ) algo.train(sess=self.sess)
def run_task(vv): env = TfEnv(normalize(gym.make('HalfCheetah-v1'))) policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=(32, 32), name="policy") baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=100, n_itr=40, discount=0.99, step_size=vv["step_size"], # Uncomment both lines (this and the plot parameter below) to enable # plotting # plot=True, ) algo.train()
import gym from garage.baselines import LinearFeatureBaseline from garage.experiment import run_experiment from garage.tf.algos import TRPO from garage.tf.envs import TfEnv from garage.tf.policies import CategoricalMLPPolicy # Need to wrap in a tf environment and force_reset to true # see https://github.com/openai/rllab/issues/87#issuecomment-282519288 env = TfEnv(gym.make("CartPole-v0")) policy = CategoricalMLPPolicy(name="policy", env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=200, n_itr=120, discount=0.99, max_kl_step=0.01, ) run_experiment(algo.train(), n_parallel=1, snapshot_mode="last", seed=1)
from garage.misc.instrument import run_experiment from garage.tf.algos import TRPO from garage.tf.policies import GaussianMLPPolicy from garage.tf.envs import TfEnv from sandbox.embed2learn.envs.mujoco import PR2ArmEnv env = TfEnv(normalize(PR2ArmEnv())) policy = GaussianMLPPolicy( name="policy", env_spec=env.spec, hidden_sizes=(32, 32), ) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=100, n_itr=100, discount=0.99, step_size=0.01, plot=True, ) algo.train()