Ejemplo n.º 1
0
def test_single():
    env = agnes.make_env('CartPole-v0')

    runner = agnes.Single(env, agnes.PPO, agnes.MLP, config=test_config())
    runner.log(agnes.log)
    runner.run(100)
    runner.trainer.save("Test.pth")
    runner.worker.load("Test.pth")
Ejemplo n.º 2
0
def test_single():
    env = agnes.make_env('CartPole-v0')

    runner = agnes.Single(env, agnes.PPO, agnes.RNN, config=test_config())
    runner.log(agnes.log)
    runner.run()
Ejemplo n.º 3
0
import agnes

env_name = "Ant-v2"

if __name__ == '__main__':
    env = agnes.make_env(env_name)

    runner = agnes.Single(env, agnes.PPO, agnes.MLP)

    runner.load("results/MuJoCo/Ant-v2_MLP/PPO/weights.pth")

    agnes.common.Visualize(runner.worker, env).prerun(1000).run()
Ejemplo n.º 4
0
import time

import agnes

env_name = "InvertedDoublePendulum-v2"  # "Swimmer-v2"  # "BreakoutNoFrameskip-v4"  #

if __name__ == '__main__':
    envs = agnes.make_vec_env(env_name)

    runner = agnes.Single(envs, agnes.PPORND, agnes.MLP)
    runner.log(agnes.TensorboardLogger('.logs/'), agnes.log)
    runner.run()

    env = agnes.make_env(env_name)

    agnes.common.Visualize(runner.worker, env).run()
Ejemplo n.º 5
0
import agnes
from agnes.algos.base import _BaseAlgo
from gym.spaces import Space


class RandomAlgo(_BaseAlgo):

    get_config = agnes.PPO.get_config

    def __init__(self, nn,
                 observation_space: Space,
                 action_space: Space,
                 *args, **kwargs):
        super().__init__()

        self.action_space = action_space

    def __call__(self, state, done):
        return self.action_space.sample(), None, None


env_name = "Ant-v2"  # "InvertedDoublePendulum-v2"  # "Swimmer-v2"  #

if __name__ == '__main__':
    env = agnes.make_env(env_name)

    runner = agnes.Single(env, RandomAlgo, agnes.MLP)

    agnes.common.Visualize(runner.worker, env).run()
Ejemplo n.º 6
0
            )

            video.write(prep)

            self.state, _, done, _ = self.env.step(dist.sample().cpu().numpy())

            if done.item():
                self.hidden = None

        video.release()

    def _save_output(self, module, input, output):
        self.outputs.append(output[0])

    def _save_gradient(self, module, grad_input, grad_output):
        self.gradients.append(grad_output[0])


env_name = "BreakoutNoFrameskip-v4"

env = agnes.make_env(env_name, config={"frame_stack": True})
config, _ = agnes.PPO.get_config(env["env_type"])

runner = agnes.Single(env, agnes.PPO, agnes.LSTMCNN, config=config)

runner.trainer.load("results/Atari-BreakoutNoFrameskip-v4-PPO-10M/Breakout.pth")

VisualizeAttention(env, runner, seconds=60, layer_num=1).run()

print("Done!")
Ejemplo n.º 7
0
def test_vec():
    envs = agnes.make_vec_env('Pendulum-v0')

    runner = agnes.Single(envs, agnes.PPO, agnes.MLP, config=test_config())
    runner.log(agnes.log)
    runner.run()
Ejemplo n.º 8
0
def test_single():
    env = agnes.make_env('Pendulum-v0')

    runner = agnes.Single(env, agnes.PPO, agnes.MLP, config=test_config())
    runner.log(agnes.CsvLogger())
    runner.run()
Ejemplo n.º 9
0
import agnes
import time

env_name = "BreakoutNoFrameskip-v4"

if __name__ == '__main__':
    envs = agnes.make_vec_env(env_name, envs_num=4)

    runner = agnes.Single(envs, agnes.PPO, agnes.CNN)
    runner.log(agnes.TensorboardLogger(), agnes.log)
    runner.run()
Ejemplo n.º 10
0
import agnes


def test_config():
    return dict(timesteps=30000,
                nsteps=128,
                nminibatches=4,
                gamma=1.0,
                lam=1.0,
                noptepochs=4,
                max_grad_norm=40.0,
                learning_rate=1e-3,
                cliprange=lambda x: 0.3 * x,
                vf_coef=1.0,
                ent_coef=.005)


env_name = "CartPole-v0"

if __name__ == '__main__':
    envs = agnes.make_vec_env(env_name, envs_num=32)

    runner = agnes.Single(envs, agnes.PPO, agnes.RNN, config=test_config())
    runner.log(agnes.log)
    runner.run()
Ejemplo n.º 11
0
import time

import agnes

env_name = "BreakoutNoFrameskip-v4"

if __name__ == '__main__':
    env = agnes.make_vec_env(env_name,
                             envs_num=4,
                             config={"frame_stack": True})

    runner = agnes.Single(env, agnes.PPO, agnes.LSTMCNN)

    # runner.worker.load("examples/distributed_rnn/Breakout.pth")
    # runner.trainer.load("examples/distributed_rnn/Breakout.pth")

    runner.log(agnes.TensorboardLogger(".logs/"), agnes.CsvLogger(".logs/"))
    runner.save_every("temp.pth", int(1e6))
    runner.run()
    runner.save("final.pth")