import time import agnes env_name = "InvertedDoublePendulum-v2" # "Swimmer-v2" # "BreakoutNoFrameskip-v4" # if __name__ == '__main__': envs = agnes.make_vec_env(env_name) runner = agnes.Single(envs, agnes.PPORND, agnes.MLP) runner.log(agnes.TensorboardLogger('.logs/'), agnes.log) runner.run() env = agnes.make_env(env_name) agnes.common.Visualize(runner.worker, env).run()
import agnes def test_config(): return dict(timesteps=30000, nsteps=128, nminibatches=4, gamma=1.0, lam=0.95, noptepochs=4, max_grad_norm=2.0, learning_rate=lambda x: 2.5e-4 * x, cliprange=lambda x: 0.1 * x, vf_coef=0.5, ent_coef=.01) if __name__ == '__main__': env = agnes.make_vec_env('CartPole-v0', envs_num=2) runner = agnes.DistributedMPI(env, agnes.PPO, agnes.MLP, config=test_config()) runner.log(agnes.log) runner.run()
import agnes import torch import time env_name3 = "Walker2d-v2" if __name__ == '__main__': envs = agnes.make_vec_env(env_name3, envs_num=8) runner = agnes.Distributed(envs, agnes.PPO, agnes.MLP) runner.log(agnes.log, agnes.TensorboardLogger()) runner.run() del runner
def test_vec(): envs = agnes.make_vec_env('Pendulum-v0') runner = agnes.Single(envs, agnes.PPO, agnes.MLP, config=test_config()) runner.log(agnes.log) runner.run()
import agnes import time env_name = "BreakoutNoFrameskip-v4" if __name__ == '__main__': envs = agnes.make_vec_env(env_name, envs_num=4) runner = agnes.Single(envs, agnes.PPO, agnes.CNN) runner.log(agnes.TensorboardLogger(), agnes.log) runner.run()
import time import agnes env_name = "BreakoutNoFrameskip-v4" if __name__ == '__main__': env = agnes.make_vec_env(env_name, envs_num=4, config={"frame_stack": True}) runner = agnes.Single(env, agnes.PPO, agnes.LSTMCNN) # runner.worker.load("examples/distributed_rnn/Breakout.pth") # runner.trainer.load("examples/distributed_rnn/Breakout.pth") runner.log(agnes.TensorboardLogger(".logs/"), agnes.CsvLogger(".logs/")) runner.save_every("temp.pth", int(1e6)) runner.run() runner.save("final.pth")