def test_single(): env = agnes.make_env('CartPole-v0') runner = agnes.Single(env, agnes.PPO, agnes.MLP, config=test_config()) runner.log(agnes.log) runner.run(100) runner.trainer.save("Test.pth") runner.worker.load("Test.pth")
def test_single(): env = agnes.make_env('CartPole-v0') runner = agnes.Single(env, agnes.PPO, agnes.RNN, config=test_config()) runner.log(agnes.log) runner.run()
import agnes env_name = "Ant-v2" if __name__ == '__main__': env = agnes.make_env(env_name) runner = agnes.Single(env, agnes.PPO, agnes.MLP) runner.load("results/MuJoCo/Ant-v2_MLP/PPO/weights.pth") agnes.common.Visualize(runner.worker, env).prerun(1000).run()
import time import agnes env_name = "InvertedDoublePendulum-v2" # "Swimmer-v2" # "BreakoutNoFrameskip-v4" # if __name__ == '__main__': envs = agnes.make_vec_env(env_name) runner = agnes.Single(envs, agnes.PPORND, agnes.MLP) runner.log(agnes.TensorboardLogger('.logs/'), agnes.log) runner.run() env = agnes.make_env(env_name) agnes.common.Visualize(runner.worker, env).run()
import agnes from agnes.algos.base import _BaseAlgo from gym.spaces import Space class RandomAlgo(_BaseAlgo): get_config = agnes.PPO.get_config def __init__(self, nn, observation_space: Space, action_space: Space, *args, **kwargs): super().__init__() self.action_space = action_space def __call__(self, state, done): return self.action_space.sample(), None, None env_name = "Ant-v2" # "InvertedDoublePendulum-v2" # "Swimmer-v2" # if __name__ == '__main__': env = agnes.make_env(env_name) runner = agnes.Single(env, RandomAlgo, agnes.MLP) agnes.common.Visualize(runner.worker, env).run()
) video.write(prep) self.state, _, done, _ = self.env.step(dist.sample().cpu().numpy()) if done.item(): self.hidden = None video.release() def _save_output(self, module, input, output): self.outputs.append(output[0]) def _save_gradient(self, module, grad_input, grad_output): self.gradients.append(grad_output[0]) env_name = "BreakoutNoFrameskip-v4" env = agnes.make_env(env_name, config={"frame_stack": True}) config, _ = agnes.PPO.get_config(env["env_type"]) runner = agnes.Single(env, agnes.PPO, agnes.LSTMCNN, config=config) runner.trainer.load("results/Atari-BreakoutNoFrameskip-v4-PPO-10M/Breakout.pth") VisualizeAttention(env, runner, seconds=60, layer_num=1).run() print("Done!")
def test_vec(): envs = agnes.make_vec_env('Pendulum-v0') runner = agnes.Single(envs, agnes.PPO, agnes.MLP, config=test_config()) runner.log(agnes.log) runner.run()
def test_single(): env = agnes.make_env('Pendulum-v0') runner = agnes.Single(env, agnes.PPO, agnes.MLP, config=test_config()) runner.log(agnes.CsvLogger()) runner.run()
import agnes import time env_name = "BreakoutNoFrameskip-v4" if __name__ == '__main__': envs = agnes.make_vec_env(env_name, envs_num=4) runner = agnes.Single(envs, agnes.PPO, agnes.CNN) runner.log(agnes.TensorboardLogger(), agnes.log) runner.run()
import agnes def test_config(): return dict(timesteps=30000, nsteps=128, nminibatches=4, gamma=1.0, lam=1.0, noptepochs=4, max_grad_norm=40.0, learning_rate=1e-3, cliprange=lambda x: 0.3 * x, vf_coef=1.0, ent_coef=.005) env_name = "CartPole-v0" if __name__ == '__main__': envs = agnes.make_vec_env(env_name, envs_num=32) runner = agnes.Single(envs, agnes.PPO, agnes.RNN, config=test_config()) runner.log(agnes.log) runner.run()
import time import agnes env_name = "BreakoutNoFrameskip-v4" if __name__ == '__main__': env = agnes.make_vec_env(env_name, envs_num=4, config={"frame_stack": True}) runner = agnes.Single(env, agnes.PPO, agnes.LSTMCNN) # runner.worker.load("examples/distributed_rnn/Breakout.pth") # runner.trainer.load("examples/distributed_rnn/Breakout.pth") runner.log(agnes.TensorboardLogger(".logs/"), agnes.CsvLogger(".logs/")) runner.save_every("temp.pth", int(1e6)) runner.run() runner.save("final.pth")