Beispiel #1
0
    def test_config_init(self, train_config):
        c = train_config
        config = PPO.generate_config({})
        config["frame_config"]["models"] = ["Actor", "Critic"]
        config["frame_config"]["model_kwargs"] = [
            {
                "state_dim": c.observe_dim,
                "action_num": c.action_num
            },
            {
                "state_dim": c.observe_dim
            },
        ]
        ppo = PPO.init_from_config(config)

        old_state = state = t.zeros([1, c.observe_dim], dtype=t.float32)
        action = t.zeros([1, 1], dtype=t.float32)
        ppo.store_episode([{
            "state": {
                "state": old_state
            },
            "action": {
                "action": action
            },
            "next_state": {
                "state": state
            },
            "reward": 0,
            "terminal": False,
        } for _ in range(3)])
        ppo.update()
Beispiel #2
0
 def gail(self, train_config, device, dtype):
     # not used for training, only used for testing apis
     c = train_config
     actor = smw(
         Actor(c.observe_dim, c.action_num).type(dtype).to(device), device,
         device)
     critic = smw(
         Critic(c.observe_dim).type(dtype).to(device), device, device)
     discriminator = smw(
         Discriminator(c.observe_dim, c.action_num).type(dtype).to(device),
         device,
         device,
     )
     ppo = PPO(
         actor,
         critic,
         t.optim.Adam,
         nn.MSELoss(reduction="sum"),
         replay_device="cpu",
         replay_size=c.replay_size,
     )
     gail = GAIL(
         discriminator,
         ppo,
         t.optim.Adam,
         expert_replay_device="cpu",
         expert_replay_size=c.replay_size,
     )
     return gail
Beispiel #3
0
 def ppo_train(self, train_config):
     c = train_config
     actor = smw(Actor(c.observe_dim, c.action_num), "cpu", "cpu")
     critic = smw(Critic(c.observe_dim), "cpu", "cpu")
     ppo = PPO(actor,
               critic,
               t.optim.Adam,
               nn.MSELoss(reduction='sum'),
               replay_device="cpu",
               replay_size=c.replay_size)
     return ppo
Beispiel #4
0
 def ppo(self, train_config, device, dtype):
     c = train_config
     actor = smw(
         Actor(c.observe_dim, c.action_num).type(dtype).to(device), device,
         device)
     critic = smw(
         Critic(c.observe_dim).type(dtype).to(device), device, device)
     ppo = PPO(actor,
               critic,
               t.optim.Adam,
               nn.MSELoss(reduction='sum'),
               replay_device="cpu",
               replay_size=c.replay_size)
     return ppo
Beispiel #5
0
 def ppo_vis(self, train_config, tmpdir):
     # not used for training, only used for testing apis
     c = train_config
     tmp_dir = tmpdir.make_numbered_dir()
     actor = smw(Actor(c.observe_dim, c.action_num)
                 .to(c.device), c.device, c.device)
     critic = smw(Critic(c.observe_dim)
                  .to(c.device), c.device, c.device)
     ppo = PPO(actor, critic,
               t.optim.Adam,
               nn.MSELoss(reduction='sum'),
               replay_device=c.device,
               replay_size=c.replay_size,
               visualize=True,
               visualize_dir=str(tmp_dir))
     return ppo
Beispiel #6
0
    def gail_lr(self, train_config, device, dtype):
        # not used for training, only used for testing apis
        c = train_config
        actor = smw(
            Actor(c.observe_dim, c.action_num).type(dtype).to(device), device,
            device)
        critic = smw(
            Critic(c.observe_dim).type(dtype).to(device), device, device)
        discriminator = smw(
            Discriminator(c.observe_dim, c.action_num).type(dtype).to(device),
            device,
            device,
        )
        ppo = PPO(
            actor,
            critic,
            t.optim.Adam,
            nn.MSELoss(reduction="sum"),
            replay_device="cpu",
            replay_size=c.replay_size,
        )
        lr_func = gen_learning_rate_func([(0, 1e-3), (200000, 3e-4)],
                                         logger=logger)
        with pytest.raises(TypeError, match="missing .+ positional argument"):
            _ = GAIL(
                discriminator,
                ppo,
                t.optim.Adam,
                expert_replay_device="cpu",
                expert_replay_size=c.replay_size,
                lr_scheduler=LambdaLR,
            )

        gail = GAIL(
            discriminator,
            ppo,
            t.optim.Adam,
            expert_replay_device="cpu",
            expert_replay_size=c.replay_size,
            lr_scheduler=LambdaLR,
            lr_scheduler_args=((lr_func, ), ),
        )
        return gail
Beispiel #7
0
 def gail_train(self, train_config):
     c = train_config
     actor = smw(Actor(c.observe_dim, c.action_num), "cpu", "cpu")
     critic = smw(Critic(c.observe_dim), "cpu", "cpu")
     discriminator = smw(Discriminator(c.observe_dim, c.action_num), "cpu",
                         "cpu")
     ppo = PPO(
         actor,
         critic,
         t.optim.Adam,
         nn.MSELoss(reduction="sum"),
         replay_device="cpu",
         replay_size=c.replay_size,
     )
     gail = GAIL(
         discriminator,
         ppo,
         t.optim.Adam,
         expert_replay_device="cpu",
         expert_replay_size=c.replay_size,
     )
     return gail