예제 #1
0
    def test_config_init(self, train_config):
        c = train_config
        config = DDPG.generate_config({})
        config["frame_config"]["models"] = ["Actor", "Actor", "Critic", "Critic"]
        config["frame_config"]["model_kwargs"] = [
            {
                "state_dim": c.observe_dim,
                "action_dim": c.action_dim,
                "action_range": c.action_range,
            }
        ] * 2 + [{"state_dim": c.observe_dim, "action_dim": c.action_dim}] * 2
        ddpg = DDPG.init_from_config(config)

        old_state = state = t.zeros([1, c.observe_dim], dtype=t.float32)
        action = t.zeros([1, c.action_dim], dtype=t.float32)
        ddpg.store_transition(
            {
                "state": {"state": old_state},
                "action": {"action": action},
                "next_state": {"state": state},
                "reward": 0,
                "terminal": False,
            }
        )
        ddpg.update()
예제 #2
0
 def ddpg(self, train_config, device, dtype):
     c = train_config
     actor = smw(
         Actor(c.observe_dim, c.action_dim, c.action_range).type(dtype).to(device),
         device,
         device,
     )
     actor_t = smw(
         Actor(c.observe_dim, c.action_dim, c.action_range).type(dtype).to(device),
         device,
         device,
     )
     critic = smw(
         Critic(c.observe_dim, c.action_dim).type(dtype).to(device), device, device
     )
     critic_t = smw(
         Critic(c.observe_dim, c.action_dim).type(dtype).to(device), device, device
     )
     ddpg = DDPG(
         actor,
         actor_t,
         critic,
         critic_t,
         t.optim.Adam,
         nn.MSELoss(reduction="sum"),
         replay_device="cpu",
         replay_size=c.replay_size,
     )
     return ddpg
예제 #3
0
 def ddpg_disc(self, train_config, device, dtype):
     # not used for training, only used for testing apis
     c = train_config
     actor = smw(
         ActorDiscrete(c.observe_dim, c.action_dim).type(dtype).to(device),
         device,
         device,
     )
     actor_t = smw(
         ActorDiscrete(c.observe_dim, c.action_dim).type(dtype).to(device),
         device,
         device,
     )
     critic = smw(
         Critic(c.observe_dim, c.action_dim).type(dtype).to(device), device, device
     )
     critic_t = smw(
         Critic(c.observe_dim, c.action_dim).type(dtype).to(device), device, device
     )
     ddpg = DDPG(
         actor,
         actor_t,
         critic,
         critic_t,
         t.optim.Adam,
         nn.MSELoss(reduction="sum"),
         replay_device="cpu",
         replay_size=c.replay_size,
     )
     return ddpg
예제 #4
0
 def ddpg_vis(self, train_config, device, dtype, tmpdir):
     # not used for training, only used for testing apis
     c = train_config
     tmp_dir = tmpdir.make_numbered_dir()
     actor = smw(
         Actor(c.observe_dim, c.action_dim, c.action_range).type(dtype).to(device),
         device,
         device,
     )
     actor_t = smw(
         Actor(c.observe_dim, c.action_dim, c.action_range).type(dtype).to(device),
         device,
         device,
     )
     critic = smw(
         Critic(c.observe_dim, c.action_dim).type(dtype).to(device), device, device
     )
     critic_t = smw(
         Critic(c.observe_dim, c.action_dim).type(dtype).to(device), device, device
     )
     ddpg = DDPG(
         actor,
         actor_t,
         critic,
         critic_t,
         t.optim.Adam,
         nn.MSELoss(reduction="sum"),
         replay_device="cpu",
         replay_size=c.replay_size,
         visualize=True,
         visualize_dir=str(tmp_dir),
     )
     return ddpg
예제 #5
0
 def ddpg_lr(self, train_config, device, dtype):
     # not used for training, only used for testing apis
     c = train_config
     actor = smw(
         ActorDiscrete(c.observe_dim, c.action_dim).type(dtype).to(device),
         device,
         device,
     )
     actor_t = smw(
         ActorDiscrete(c.observe_dim, c.action_dim).type(dtype).to(device),
         device,
         device,
     )
     critic = smw(
         Critic(c.observe_dim, c.action_dim).type(dtype).to(device), device, device
     )
     critic_t = smw(
         Critic(c.observe_dim, c.action_dim).type(dtype).to(device), device, device
     )
     lr_func = gen_learning_rate_func([(0, 1e-3), (200000, 3e-4)], logger=logger)
     with pytest.raises(TypeError, match="missing .+ positional argument"):
         _ = DDPG(
             actor,
             actor_t,
             critic,
             critic_t,
             t.optim.Adam,
             nn.MSELoss(reduction="sum"),
             replay_device="cpu",
             replay_size=c.replay_size,
             lr_scheduler=LambdaLR,
         )
     ddpg = DDPG(
         actor,
         actor_t,
         critic,
         critic_t,
         t.optim.Adam,
         nn.MSELoss(reduction="sum"),
         replay_device="cpu",
         replay_size=c.replay_size,
         lr_scheduler=LambdaLR,
         lr_scheduler_args=((lr_func,), (lr_func,)),
     )
     return ddpg
예제 #6
0
 def ddpg_train(self, train_config):
     c = train_config
     # cpu is faster for testing full training.
     actor = smw(Actor(c.observe_dim, c.action_dim, c.action_range), "cpu", "cpu")
     actor_t = smw(Actor(c.observe_dim, c.action_dim, c.action_range), "cpu", "cpu")
     critic = smw(Critic(c.observe_dim, c.action_dim), "cpu", "cpu")
     critic_t = smw(Critic(c.observe_dim, c.action_dim), "cpu", "cpu")
     ddpg = DDPG(
         actor,
         actor_t,
         critic,
         critic_t,
         t.optim.Adam,
         nn.MSELoss(reduction="sum"),
         replay_device="cpu",
         replay_size=c.replay_size,
     )
     return ddpg
예제 #7
0
 def ddpg(self, train_config):
     c = train_config
     actor = smw(
         Actor(c.observe_dim, c.action_dim, c.action_range).to(c.device),
         c.device, c.device)
     actor_t = smw(
         Actor(c.observe_dim, c.action_dim, c.action_range).to(c.device),
         c.device, c.device)
     critic = smw(
         Critic(c.observe_dim, c.action_dim).to(c.device), c.device,
         c.device)
     critic_t = smw(
         Critic(c.observe_dim, c.action_dim).to(c.device), c.device,
         c.device)
     ddpg = DDPG(actor,
                 actor_t,
                 critic,
                 critic_t,
                 t.optim.Adam,
                 nn.MSELoss(reduction='sum'),
                 replay_device=c.device,
                 replay_size=c.replay_size)
     return ddpg
예제 #8
0
 def ddpg_disc(self, train_config):
     # not used for training, only used for testing apis
     c = train_config
     actor = smw(
         ActorDiscreet(c.observe_dim, c.action_dim).to(c.device), c.device,
         c.device)
     actor_t = smw(
         ActorDiscreet(c.observe_dim, c.action_dim).to(c.device), c.device,
         c.device)
     critic = smw(
         Critic(c.observe_dim, c.action_dim).to(c.device), c.device,
         c.device)
     critic_t = smw(
         Critic(c.observe_dim, c.action_dim).to(c.device), c.device,
         c.device)
     ddpg = DDPG(actor,
                 actor_t,
                 critic,
                 critic_t,
                 t.optim.Adam,
                 nn.MSELoss(reduction='sum'),
                 replay_device=c.device,
                 replay_size=c.replay_size)
     return ddpg