def run_task(*_): initial_goal = np.array([0.6, -0.1, 0.80]) rospy.init_node('trpo_sim_sawyer_push_exp', anonymous=True) push_env = PushEnv(initial_goal, initial_joint_pos=INITIAL_ROBOT_JOINT_POS, simulated=True) rospy.on_shutdown(push_env.shutdown) push_env.initialize() env = push_env policy = GaussianMLPPolicy(env_spec=spec(env), hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=spec(env)) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=100, n_itr=100, discount=0.99, step_size=0.01, plot=False, force_batch_sampler=True, ) algo.train()
def run_task(*_): """Run task function.""" initial_goal = np.array([0.6, -0.1, 0.40]) # Initialize moveit_commander moveit_commander.roscpp_initialize(sys.argv) rospy.init_node('trpo_sim_sawyer_reacher_exp', anonymous=True) env = ReacherEnv(initial_goal, initial_joint_pos=INITIAL_ROBOT_JOINT_POS, simulated=True) rospy.on_shutdown(env.shutdown) env.initialize() policy = GaussianMLPPolicy(env_spec=spec(env), hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=spec(env)) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=100, n_itr=100, discount=0.99, step_size=0.01, plot=False, force_batch_sampler=True, ) algo.train()
def run_pick_and_place(*_): initial_goal = np.array([0.6, -0.1, 0.80]) env = PickAndPlaceEnv(initial_goal) policy = GaussianMLPPolicy(env_spec=spec(env), hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=spec(env)) algo = TRPO( env=env, policy=policy, batch_size=4000, max_path_length=2000, baseline=baseline, n_itr=1000, discount=0.99, step_size=0.01, plot=True, force_batch_sampler=True, ) algo.train()
def run_task(*_): env = normalize(gym.make("Acrobot-v1")) policy = CategoricalMLPPolicy(env_spec=spec(env), hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=spec(env)) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=horizon(env), n_itr=50, discount=0.99, step_size=0.01, plot=True, ) algo.train()
def run_block_stacking(*_): """Run TRPO with block stacking. """ env = BlockStackingEnv() policy = GaussianMLPPolicy(env_spec=spec(env), hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=spec(env)) algo = TRPO( env=env, policy=policy, batch_size=4000, max_path_length=2000, baseline=baseline, n_itr=1000, discount=0.99, step_size=0.01, plot=True, force_batch_sampler=True, ) algo.train()
def run_task(vv): env = TfEnv(normalize(gym.make('HalfCheetah-v1'))) policy = GaussianMLPPolicy(env_spec=spec(env), hidden_sizes=(32, 32), name="policy") baseline = LinearFeatureBaseline(env_spec=spec(env)) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=100, n_itr=40, discount=0.99, step_size=vv["step_size"], # Uncomment both lines (this and the plot parameter below) to enable # plotting # plot=True, ) algo.train()
from garage.baselines import LinearFeatureBaseline from garage.envs.util import spec from garage.misc.instrument import run_experiment from garage.misc.instrument import stub from garage.tf.algos import TRPO from garage.tf.envs import TfEnv from garage.tf.policies import CategoricalMLPPolicy stub(globals()) # Need to wrap in a tf environment and force_reset to true # see https://github.com/openai/rllab/issues/87#issuecomment-282519288 env = TfEnv(gym.make("CartPole-v0")) policy = CategoricalMLPPolicy(name="policy", env_spec=spec(env), hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=spec(env)) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=200, n_itr=120, discount=0.99, step_size=0.01, )