コード例 #1
0
ファイル: trpo_sim_sawyer_push.py プロジェクト: gntoni/garage
def run_task(*_):
    initial_goal = np.array([0.6, -0.1, 0.80])

    rospy.init_node('trpo_sim_sawyer_push_exp', anonymous=True)

    push_env = PushEnv(initial_goal,
                       initial_joint_pos=INITIAL_ROBOT_JOINT_POS,
                       simulated=True)

    rospy.on_shutdown(push_env.shutdown)

    push_env.initialize()

    env = push_env

    policy = GaussianMLPPolicy(env_spec=spec(env), hidden_sizes=(32, 32))

    baseline = LinearFeatureBaseline(env_spec=spec(env))

    algo = TRPO(
        env=env,
        policy=policy,
        baseline=baseline,
        batch_size=4000,
        max_path_length=100,
        n_itr=100,
        discount=0.99,
        step_size=0.01,
        plot=False,
        force_batch_sampler=True,
    )
    algo.train()
コード例 #2
0
def run_task(*_):
    """Run task function."""
    initial_goal = np.array([0.6, -0.1, 0.40])

    # Initialize moveit_commander
    moveit_commander.roscpp_initialize(sys.argv)

    rospy.init_node('trpo_sim_sawyer_reacher_exp', anonymous=True)

    env = ReacherEnv(initial_goal,
                     initial_joint_pos=INITIAL_ROBOT_JOINT_POS,
                     simulated=True)

    rospy.on_shutdown(env.shutdown)

    env.initialize()

    policy = GaussianMLPPolicy(env_spec=spec(env), hidden_sizes=(32, 32))

    baseline = LinearFeatureBaseline(env_spec=spec(env))

    algo = TRPO(
        env=env,
        policy=policy,
        baseline=baseline,
        batch_size=4000,
        max_path_length=100,
        n_itr=100,
        discount=0.99,
        step_size=0.01,
        plot=False,
        force_batch_sampler=True,
    )
    algo.train()
コード例 #3
0
def run_pick_and_place(*_):
    initial_goal = np.array([0.6, -0.1, 0.80])
    env = PickAndPlaceEnv(initial_goal)
    policy = GaussianMLPPolicy(env_spec=spec(env), hidden_sizes=(32, 32))
    baseline = LinearFeatureBaseline(env_spec=spec(env))
    algo = TRPO(
        env=env,
        policy=policy,
        batch_size=4000,
        max_path_length=2000,
        baseline=baseline,
        n_itr=1000,
        discount=0.99,
        step_size=0.01,
        plot=True,
        force_batch_sampler=True,
    )
    algo.train()
コード例 #4
0
ファイル: trpo_gym_Acrobot-v1.py プロジェクト: gntoni/garage
def run_task(*_):
    env = normalize(gym.make("Acrobot-v1"))

    policy = CategoricalMLPPolicy(env_spec=spec(env), hidden_sizes=(32, 32))

    baseline = LinearFeatureBaseline(env_spec=spec(env))

    algo = TRPO(
        env=env,
        policy=policy,
        baseline=baseline,
        batch_size=4000,
        max_path_length=horizon(env),
        n_itr=50,
        discount=0.99,
        step_size=0.01,
        plot=True,
    )
    algo.train()
コード例 #5
0
def run_block_stacking(*_):
    """Run TRPO with block stacking. """
    env = BlockStackingEnv()

    policy = GaussianMLPPolicy(env_spec=spec(env), hidden_sizes=(32, 32))
    baseline = LinearFeatureBaseline(env_spec=spec(env))
    algo = TRPO(
        env=env,
        policy=policy,
        batch_size=4000,
        max_path_length=2000,
        baseline=baseline,
        n_itr=1000,
        discount=0.99,
        step_size=0.01,
        plot=True,
        force_batch_sampler=True,
    )
    algo.train()
コード例 #6
0
def run_task(vv):

    env = TfEnv(normalize(gym.make('HalfCheetah-v1')))

    policy = GaussianMLPPolicy(env_spec=spec(env),
                               hidden_sizes=(32, 32),
                               name="policy")

    baseline = LinearFeatureBaseline(env_spec=spec(env))

    algo = TRPO(
        env=env,
        policy=policy,
        baseline=baseline,
        batch_size=4000,
        max_path_length=100,
        n_itr=40,
        discount=0.99,
        step_size=vv["step_size"],
        # Uncomment both lines (this and the plot parameter below) to enable
        # plotting
        # plot=True,
    )
    algo.train()
コード例 #7
0
ファイル: trpo_gym_tf_cartpole.py プロジェクト: gntoni/garage
from garage.baselines import LinearFeatureBaseline
from garage.envs.util import spec
from garage.misc.instrument import run_experiment
from garage.misc.instrument import stub
from garage.tf.algos import TRPO
from garage.tf.envs import TfEnv
from garage.tf.policies import CategoricalMLPPolicy

stub(globals())

# Need to wrap in a tf environment and force_reset to true
# see https://github.com/openai/rllab/issues/87#issuecomment-282519288
env = TfEnv(gym.make("CartPole-v0"))

policy = CategoricalMLPPolicy(name="policy",
                              env_spec=spec(env),
                              hidden_sizes=(32, 32))

baseline = LinearFeatureBaseline(env_spec=spec(env))

algo = TRPO(
    env=env,
    policy=policy,
    baseline=baseline,
    batch_size=4000,
    max_path_length=200,
    n_itr=120,
    discount=0.99,
    step_size=0.01,
)