예제 #1
0
 def ddpg_per(self, train_config, device, dtype):
     c = train_config
     actor = smw(
         Actor(c.observe_dim, c.action_dim,
               c.action_range).type(dtype).to(device),
         device,
         device,
     )
     actor_t = smw(
         Actor(c.observe_dim, c.action_dim,
               c.action_range).type(dtype).to(device),
         device,
         device,
     )
     critic = smw(
         Critic(c.observe_dim, c.action_dim).type(dtype).to(device), device,
         device)
     critic_t = smw(
         Critic(c.observe_dim, c.action_dim).type(dtype).to(device), device,
         device)
     ddpg_per = DDPGPer(
         actor,
         actor_t,
         critic,
         critic_t,
         t.optim.Adam,
         nn.MSELoss(reduction="sum"),
         replay_device="cpu",
         replay_size=c.replay_size,
     )
     return ddpg_per
예제 #2
0
 def ddpg_per_vis(self, train_config, device, dtype, tmpdir):
     # not used for training, only used for testing apis
     c = train_config
     tmp_dir = tmpdir.make_numbered_dir()
     actor = smw(
         Actor(c.observe_dim, c.action_dim,
               c.action_range).type(dtype).to(device),
         device,
         device,
     )
     actor_t = smw(
         Actor(c.observe_dim, c.action_dim,
               c.action_range).type(dtype).to(device),
         device,
         device,
     )
     critic = smw(
         Critic(c.observe_dim, c.action_dim).type(dtype).to(device), device,
         device)
     critic_t = smw(
         Critic(c.observe_dim, c.action_dim).type(dtype).to(device), device,
         device)
     ddpg_per = DDPGPer(
         actor,
         actor_t,
         critic,
         critic_t,
         t.optim.Adam,
         nn.MSELoss(reduction="sum"),
         replay_device="cpu",
         replay_size=c.replay_size,
         visualize=True,
         visualize_dir=str(tmp_dir),
     )
     return ddpg_per
예제 #3
0
    def test_criterion(self, train_config, device, dtype):
        c = train_config
        actor = smw(
            Actor(c.observe_dim, c.action_dim, c.action_range).type(dtype).to(device),
            device,
            device,
        )
        actor_t = smw(
            Actor(c.observe_dim, c.action_dim, c.action_range).type(dtype).to(device),
            device,
            device,
        )
        critic = smw(
            Critic(c.observe_dim, c.action_dim).type(dtype).to(device), device, device
        )
        critic_t = smw(
            Critic(c.observe_dim, c.action_dim).type(dtype).to(device), device, device
        )
        with pytest.raises(
            RuntimeError, match="Criterion does not have the " "'reduction' property"
        ):

            def criterion(a, b):
                return a - b

            _ = DDPGPer(
                actor,
                actor_t,
                critic,
                critic_t,
                t.optim.Adam,
                criterion,
                replay_device="cpu",
                replay_size=c.replay_size,
            )
예제 #4
0
 def ddpg_per_train(self, train_config):
     c = train_config
     # cpu is faster for testing full training.
     actor = smw(Actor(c.observe_dim, c.action_dim, c.action_range), "cpu", "cpu")
     actor_t = smw(Actor(c.observe_dim, c.action_dim, c.action_range), "cpu", "cpu")
     critic = smw(Critic(c.observe_dim, c.action_dim), "cpu", "cpu")
     critic_t = smw(Critic(c.observe_dim, c.action_dim), "cpu", "cpu")
     ddpg_per = DDPGPer(
         actor,
         actor_t,
         critic,
         critic_t,
         t.optim.Adam,
         nn.MSELoss(reduction="sum"),
         replay_device="cpu",
         replay_size=c.replay_size,
     )
     return ddpg_per
예제 #5
0
 def ddpg_per(self, train_config):
     c = train_config
     actor = smw(
         Actor(c.observe_dim, c.action_dim, c.action_range).to(c.device),
         c.device, c.device)
     actor_t = smw(
         Actor(c.observe_dim, c.action_dim, c.action_range).to(c.device),
         c.device, c.device)
     critic = smw(
         Critic(c.observe_dim, c.action_dim).to(c.device), c.device,
         c.device)
     critic_t = smw(
         Critic(c.observe_dim, c.action_dim).to(c.device), c.device,
         c.device)
     ddpg_per = DDPGPer(actor,
                        actor_t,
                        critic,
                        critic_t,
                        t.optim.Adam,
                        nn.MSELoss(reduction='sum'),
                        replay_device=c.device,
                        replay_size=c.replay_size)
     return ddpg_per
예제 #6
0
    def _build_model(self):

        actor = self._build_actor()
        actor_target = self._build_actor()
        critic = self._build_critic()
        critic_target = self._build_critic()

        optimizer = lambda params, lr: torch.optim.Adam(
            params, lr=lr, weight_decay=self.l2_reg)
        criterion = nn.MSELoss(reduction='sum')

        # DDPG with prioritized replay
        self.ddpg_per = DDPGPer(actor,
                                actor_target,
                                critic,
                                critic_target,
                                optimizer=optimizer,
                                criterion=criterion,
                                batch_size=self.batch_size,
                                actor_learning_rate=self.actor_learning_rate,
                                critic_learning_rate=self.critic_learning_rate,
                                discount=self.gamma,
                                replay_size=self.replay_capacity)