max_num_steps=1000000, logging_dir="half_cheetah/hierarchy_sac/", num_hierarchy_levels=2, time_skip=10, hidden_size=256, num_hidden_layers=2, reward_scale=1.0, discount=0.99, initial_alpha=1.0, policy_learning_rate=0.0003, qf_learning_rate=0.0003, tau=0.005, batch_size=256, max_path_length=1000, num_workers=10, num_warm_up_steps=100000, num_steps_per_epoch=1000, num_steps_per_eval=10000, num_epochs_per_eval=10, num_epochs=10000) # make sure that all the right parameters are here assert all([x in variant.keys() for x in hierarchy_sac_variant.keys()]) # launch the experiment using ray launch_local( hierarchy_sac, variant, HalfCheetahEnv, num_seeds=3)
from multiarchy.baselines.ddpg import ddpg, ddpg_variant from multiarchy.envs.pointmass_env import PointmassEnv if __name__ == "__main__": # parameters for the learning experiment variant = dict(max_num_steps=10000, logging_dir="pointmass/ddpg/", hidden_size=400, num_hidden_layers=2, reward_scale=1.0, discount=0.99, policy_learning_rate=0.0003, qf_learning_rate=0.0003, tau=0.005, exploration_noise_std=0.1, batch_size=256, max_path_length=10, num_workers=2, num_warm_up_steps=100, num_steps_per_epoch=10, num_steps_per_eval=100, num_epochs_per_eval=10, num_epochs=10000) # make sure that all the right parameters are here assert all([x in variant.keys() for x in ddpg_variant.keys()]) # launch the experiment using ray launch_local(ddpg, variant, PointmassEnv, num_seeds=2)
from multiarchy.baselines.ppo import ppo, ppo_variant from gym.envs.mujoco.half_cheetah import HalfCheetahEnv if __name__ == "__main__": # parameters for the learning experiment variant = dict(max_path_length=1000, max_num_paths=1000, logging_dir="half_cheetah/ppo/", hidden_size=400, num_hidden_layers=2, reward_scale=1.0, discount=0.99, epsilon=0.1, lamb=0.95, off_policy_updates=10, critic_updates=32, policy_learning_rate=0.0001, vf_learning_rate=0.001, num_workers=10, num_steps_per_epoch=10000, num_steps_per_eval=10000, num_epochs_per_eval=1, num_epochs=1000) # make sure that all the right parameters are here assert all([x in variant.keys() for x in ppo_variant.keys()]) # launch the experiment using ray launch_local(ppo, variant, HalfCheetahEnv, num_seeds=3)
variant = dict( max_path_length=500, max_num_paths=1000, logging_dir="hopper_test2/ppo3/", hidden_size=400, num_hidden_layers=2, reward_scale=1.0, discount=0.99, epsilon=0.1, lamb=0.95, off_policy_updates=10, critic_updates=32, policy_learning_rate=0.0001, vf_learning_rate=0.001, exploration_noise_std=0.5, num_workers=10, num_steps_per_epoch=5000, num_steps_per_eval=5000, num_epochs_per_eval=10, num_epochs=1000) # make sure that all the right parameters are here assert all([x in variant.keys() for x in ppo_variant.keys()]) # launch the experiment using ray launch_local( ppo, variant, HopperEnv, num_seeds=1)
# parameters for the learning experiment variant = dict( max_num_steps=1000000, logging_dir="humanoid/sac/", hidden_size=400, num_hidden_layers=2, reward_scale=0.01, discount=0.99, initial_alpha=0.01, lr=0.0003, tau=0.005, batch_size=256, max_path_length=1000, num_workers=2, num_warm_up_steps=10000, num_steps_per_epoch=1000, num_steps_per_eval=10000, num_epochs_per_eval=10, num_epochs=10000) # make sure that all the right parameters are here assert all([x in variant.keys() for x in sac_variant.keys()]) # launch the experiment using ray launch_local( sac, variant, HumanoidEnv, num_seeds=1)
from multiarchy.baselines.ddpg import ddpg, ddpg_variant from gym.envs.mujoco.hopper import HopperEnv if __name__ == "__main__": # parameters for the learning experiment variant = dict(max_num_steps=1000000, logging_dir="hopper_test2/ddpg/", hidden_size=400, num_hidden_layers=2, reward_scale=1.0, discount=0.99, policy_learning_rate=0.0001, qf_learning_rate=0.001, tau=0.005, exploration_noise_std=0.2, batch_size=256, max_path_length=500, num_workers=2, num_warm_up_steps=5000, num_steps_per_epoch=500, num_steps_per_eval=5000, num_epochs_per_eval=10, num_epochs=10000) # make sure that all the right parameters are here assert all([x in variant.keys() for x in ddpg_variant.keys()]) # launch the experiment using ray launch_local(ddpg, variant, HopperEnv, num_seeds=1)
"""Author: Brandon Trabucco, Copyright 2019, MIT License""" from multiarchy.launch import launch_local from multiarchy.baselines.policy_gradient import policy_gradient, policy_gradient_variant from gym.envs.mujoco.hopper import HopperEnv if __name__ == "__main__": # parameters for the learning experiment variant = dict(max_path_length=500, max_num_paths=1000, logging_dir="hopper_test2/pg/", hidden_size=400, num_hidden_layers=2, reward_scale=1.0, discount=0.99, policy_learning_rate=0.0001, exploration_noise_std=0.5, num_workers=10, num_steps_per_epoch=5000, num_steps_per_eval=5000, num_epochs_per_eval=10, num_epochs=1000) # make sure that all the right parameters are here assert all([x in variant.keys() for x in policy_gradient_variant.keys()]) # launch the experiment using ray launch_local(policy_gradient, variant, HopperEnv, num_seeds=1)
from gym.envs.mujoco.hopper import HopperEnv if __name__ == "__main__": # parameters for the learning experiment variant = dict(max_num_steps=1000000, logging_dir="hopper/sac/", hidden_size=256, num_hidden_layers=2, reward_scale=1.0, discount=0.99, initial_alpha=0.005, policy_learning_rate=0.0003, qf_learning_rate=0.0003, tau=0.005, batch_size=256, max_path_length=1000, num_workers=10, num_warm_up_steps=1000, num_steps_per_epoch=1000, num_steps_per_eval=50000, num_steps_per_gradient=1, num_epochs_per_eval=10, num_epochs=10000) # make sure that all the right parameters are here assert all([x in variant.keys() for x in sac_variant.keys()]) # launch the experiment using ray launch_local(sac, variant, HopperEnv, num_seeds=5)