Esempio n. 1
0
        max_num_steps=1000000,
        logging_dir="half_cheetah/hierarchy_sac/",
        num_hierarchy_levels=2,
        time_skip=10,
        hidden_size=256,
        num_hidden_layers=2,
        reward_scale=1.0,
        discount=0.99,
        initial_alpha=1.0,
        policy_learning_rate=0.0003,
        qf_learning_rate=0.0003,
        tau=0.005,
        batch_size=256,
        max_path_length=1000,
        num_workers=10,
        num_warm_up_steps=100000,
        num_steps_per_epoch=1000,
        num_steps_per_eval=10000,
        num_epochs_per_eval=10,
        num_epochs=10000)

    # make sure that all the right parameters are here
    assert all([x in variant.keys() for x in hierarchy_sac_variant.keys()])

    # launch the experiment using ray
    launch_local(
        hierarchy_sac,
        variant,
        HalfCheetahEnv,
        num_seeds=3)
Esempio n. 2
0
from multiarchy.baselines.ddpg import ddpg, ddpg_variant
from multiarchy.envs.pointmass_env import PointmassEnv

if __name__ == "__main__":

    # parameters for the learning experiment
    variant = dict(max_num_steps=10000,
                   logging_dir="pointmass/ddpg/",
                   hidden_size=400,
                   num_hidden_layers=2,
                   reward_scale=1.0,
                   discount=0.99,
                   policy_learning_rate=0.0003,
                   qf_learning_rate=0.0003,
                   tau=0.005,
                   exploration_noise_std=0.1,
                   batch_size=256,
                   max_path_length=10,
                   num_workers=2,
                   num_warm_up_steps=100,
                   num_steps_per_epoch=10,
                   num_steps_per_eval=100,
                   num_epochs_per_eval=10,
                   num_epochs=10000)

    # make sure that all the right parameters are here
    assert all([x in variant.keys() for x in ddpg_variant.keys()])

    # launch the experiment using ray
    launch_local(ddpg, variant, PointmassEnv, num_seeds=2)
Esempio n. 3
0
from multiarchy.baselines.ppo import ppo, ppo_variant
from gym.envs.mujoco.half_cheetah import HalfCheetahEnv

if __name__ == "__main__":

    # parameters for the learning experiment
    variant = dict(max_path_length=1000,
                   max_num_paths=1000,
                   logging_dir="half_cheetah/ppo/",
                   hidden_size=400,
                   num_hidden_layers=2,
                   reward_scale=1.0,
                   discount=0.99,
                   epsilon=0.1,
                   lamb=0.95,
                   off_policy_updates=10,
                   critic_updates=32,
                   policy_learning_rate=0.0001,
                   vf_learning_rate=0.001,
                   num_workers=10,
                   num_steps_per_epoch=10000,
                   num_steps_per_eval=10000,
                   num_epochs_per_eval=1,
                   num_epochs=1000)

    # make sure that all the right parameters are here
    assert all([x in variant.keys() for x in ppo_variant.keys()])

    # launch the experiment using ray
    launch_local(ppo, variant, HalfCheetahEnv, num_seeds=3)
Esempio n. 4
0
    variant = dict(
        max_path_length=500,
        max_num_paths=1000,
        logging_dir="hopper_test2/ppo3/",
        hidden_size=400,
        num_hidden_layers=2,
        reward_scale=1.0,
        discount=0.99,
        epsilon=0.1,
        lamb=0.95,
        off_policy_updates=10,
        critic_updates=32,
        policy_learning_rate=0.0001,
        vf_learning_rate=0.001,
        exploration_noise_std=0.5,
        num_workers=10,
        num_steps_per_epoch=5000,
        num_steps_per_eval=5000,
        num_epochs_per_eval=10,
        num_epochs=1000)

    # make sure that all the right parameters are here
    assert all([x in variant.keys() for x in ppo_variant.keys()])

    # launch the experiment using ray
    launch_local(
        ppo,
        variant,
        HopperEnv,
        num_seeds=1)
Esempio n. 5
0
    # parameters for the learning experiment
    variant = dict(
        max_num_steps=1000000,
        logging_dir="humanoid/sac/",
        hidden_size=400,
        num_hidden_layers=2,
        reward_scale=0.01,
        discount=0.99,
        initial_alpha=0.01,
        lr=0.0003,
        tau=0.005,
        batch_size=256,
        max_path_length=1000,
        num_workers=2,
        num_warm_up_steps=10000,
        num_steps_per_epoch=1000,
        num_steps_per_eval=10000,
        num_epochs_per_eval=10,
        num_epochs=10000)

    # make sure that all the right parameters are here
    assert all([x in variant.keys() for x in sac_variant.keys()])

    # launch the experiment using ray
    launch_local(
        sac,
        variant,
        HumanoidEnv,
        num_seeds=1)
Esempio n. 6
0
from multiarchy.baselines.ddpg import ddpg, ddpg_variant
from gym.envs.mujoco.hopper import HopperEnv

if __name__ == "__main__":

    # parameters for the learning experiment
    variant = dict(max_num_steps=1000000,
                   logging_dir="hopper_test2/ddpg/",
                   hidden_size=400,
                   num_hidden_layers=2,
                   reward_scale=1.0,
                   discount=0.99,
                   policy_learning_rate=0.0001,
                   qf_learning_rate=0.001,
                   tau=0.005,
                   exploration_noise_std=0.2,
                   batch_size=256,
                   max_path_length=500,
                   num_workers=2,
                   num_warm_up_steps=5000,
                   num_steps_per_epoch=500,
                   num_steps_per_eval=5000,
                   num_epochs_per_eval=10,
                   num_epochs=10000)

    # make sure that all the right parameters are here
    assert all([x in variant.keys() for x in ddpg_variant.keys()])

    # launch the experiment using ray
    launch_local(ddpg, variant, HopperEnv, num_seeds=1)
Esempio n. 7
0
"""Author: Brandon Trabucco, Copyright 2019, MIT License"""

from multiarchy.launch import launch_local
from multiarchy.baselines.policy_gradient import policy_gradient, policy_gradient_variant
from gym.envs.mujoco.hopper import HopperEnv

if __name__ == "__main__":

    # parameters for the learning experiment
    variant = dict(max_path_length=500,
                   max_num_paths=1000,
                   logging_dir="hopper_test2/pg/",
                   hidden_size=400,
                   num_hidden_layers=2,
                   reward_scale=1.0,
                   discount=0.99,
                   policy_learning_rate=0.0001,
                   exploration_noise_std=0.5,
                   num_workers=10,
                   num_steps_per_epoch=5000,
                   num_steps_per_eval=5000,
                   num_epochs_per_eval=10,
                   num_epochs=1000)

    # make sure that all the right parameters are here
    assert all([x in variant.keys() for x in policy_gradient_variant.keys()])

    # launch the experiment using ray
    launch_local(policy_gradient, variant, HopperEnv, num_seeds=1)
Esempio n. 8
0
from gym.envs.mujoco.hopper import HopperEnv

if __name__ == "__main__":

    # parameters for the learning experiment
    variant = dict(max_num_steps=1000000,
                   logging_dir="hopper/sac/",
                   hidden_size=256,
                   num_hidden_layers=2,
                   reward_scale=1.0,
                   discount=0.99,
                   initial_alpha=0.005,
                   policy_learning_rate=0.0003,
                   qf_learning_rate=0.0003,
                   tau=0.005,
                   batch_size=256,
                   max_path_length=1000,
                   num_workers=10,
                   num_warm_up_steps=1000,
                   num_steps_per_epoch=1000,
                   num_steps_per_eval=50000,
                   num_steps_per_gradient=1,
                   num_epochs_per_eval=10,
                   num_epochs=10000)

    # make sure that all the right parameters are here
    assert all([x in variant.keys() for x in sac_variant.keys()])

    # launch the experiment using ray
    launch_local(sac, variant, HopperEnv, num_seeds=5)