예제 #1
0
 def update_target(self):
     super().update_target()
     soft_update(
         self.error_target,
         self.error,
         self.target_update_coef
     )
예제 #2
0
    def __init__(self, state_shape, action_shape, device, seed, batch_size=256,
                 gamma=0.99, nstep=1, replay_size=10**6, start_steps=10**4,
                 lr_actor=3e-4, lr_critic=3e-4, lr_alpha=3e-4, alpha_init=1.0,
                 target_update_coef=5e-3, lr_error=3e-4, tau_init=10.0,
                 start_steps_is=10**4):
        super().__init__(
            state_shape, action_shape, device, seed, batch_size, gamma, nstep,
            replay_size, start_steps, lr_actor, lr_critic, lr_alpha,
            alpha_init, target_update_coef)
        assert nstep == 1, 'DisCor only supports nstep=1.'

        self.error = TwinnedErrorFunc(
            state_shape=self.state_shape,
            action_shape=self.action_shape,
            hidden_units=[256, 256, 256],
            hidden_activation=nn.ReLU(inplace=True)
        ).to(self.device)
        self.error_target = TwinnedErrorFunc(
            state_shape=self.state_shape,
            action_shape=self.action_shape,
            hidden_units=[256, 256, 256],
            hidden_activation=nn.ReLU(inplace=True)
        ).to(self.device).eval()

        soft_update(self.error_target, self.error, 1.0)
        disable_gradient(self.error_target)

        self.optim_error = Adam(self.error.parameters(), lr=lr_error)
        self.tau1 = torch.tensor(tau_init, device=device, requires_grad=False)
        self.tau2 = torch.tensor(tau_init, device=device, requires_grad=False)

        self.start_steps_is = start_steps_is
예제 #3
0
    def __init__(self,
                 state_shape,
                 action_shape,
                 device,
                 seed,
                 batch_size=256,
                 gamma=0.99,
                 nstep=1,
                 replay_size=10**6,
                 start_steps=10**4,
                 lr_actor=3e-4,
                 lr_critic=3e-4,
                 lr_alpha=3e-4,
                 alpha_init=1.0,
                 target_update_coef=5e-3):
        super().__init__(state_shape, action_shape, device, seed, batch_size,
                         gamma, nstep, replay_size, start_steps)

        self.build_network()
        soft_update(self.critic_target, self.critic, 1.0)
        disable_gradient(self.critic_target)

        self.optim_actor = Adam(self.actor.parameters(), lr=lr_actor)
        self.optim_critic = Adam(self.critic.parameters(), lr=lr_critic)

        self.alpha = alpha_init
        self.log_alpha = torch.tensor(np.log(self.alpha),
                                      device=device,
                                      requires_grad=True)
        self.optim_alpha = torch.optim.Adam([self.log_alpha], lr=lr_alpha)
        self.target_entropy = -float(action_shape[0])

        self.target_update_coef = target_update_coef
예제 #4
0
    def __init__(self,
                 state_shape,
                 action_shape,
                 device,
                 seed,
                 batch_size=128,
                 gamma=0.99,
                 nstep=1,
                 replay_size=10**6,
                 start_steps=1000,
                 lr_encoder=1e-3,
                 lr_decoder=1e-3,
                 lr_actor=1e-3,
                 lr_critic=1e-3,
                 lr_alpha=1e-4,
                 alpha_init=0.1,
                 update_freq_actor=2,
                 update_freq_ae=1,
                 update_freq_target=2,
                 target_update_coef=0.01,
                 target_update_coef_ae=0.05,
                 lambda_rae_latents=1e-6,
                 lambda_rae_weights=1e-7,
                 lr_error=1e-3,
                 tau_init=10.0,
                 start_steps_is=10**4,
                 update_freq_error=2):
        super().__init__(state_shape, action_shape, device, seed, batch_size,
                         gamma, nstep, replay_size, start_steps, lr_encoder,
                         lr_decoder, lr_actor, lr_critic, lr_alpha, alpha_init,
                         update_freq_actor, update_freq_ae, update_freq_target,
                         target_update_coef, target_update_coef_ae,
                         lambda_rae_latents, lambda_rae_weights)

        self.error = TwinnedErrorFuncWithEncoder(
            encoder=self.encoder,
            action_shape=self.action_shape,
            hidden_units=[1024, 1024],
            hidden_activation=nn.ReLU(inplace=True)).to(self.device)
        self.error_target = TwinnedErrorFuncWithEncoder(
            encoder=self.encoder_target,
            action_shape=self.action_shape,
            hidden_units=[1024, 1024],
            hidden_activation=nn.ReLU(inplace=True)).to(self.device).eval()

        soft_update(self.error_target.mlp_error, self.error.mlp_error, 1.0)
        disable_gradient(self.error_target.mlp_error)

        self.optim_error = Adam(self.error.parameters(), lr=lr_error)
        self.tau1 = torch.tensor(tau_init, device=device, requires_grad=False)
        self.tau2 = torch.tensor(tau_init, device=device, requires_grad=False)

        self.start_steps_is = start_steps_is
        self.update_freq_error = update_freq_error
예제 #5
0
 def update_target(self):
     soft_update(
         self.actor_target,
         self.actor,
         self.target_update_coef
     )
     soft_update(
         self.critic_target,
         self.critic,
         self.target_update_coef
     )
예제 #6
0
    def __init__(self, state_shape, action_shape, device, seed, batch_size=128,
                 gamma=0.99, nstep=1, replay_size=10**6, start_steps=10**4,
                 lr_actor=1e-3, lr_critic=1e-3, std=0.1,
                 target_update_coef=5e-3):
        super().__init__(
            state_shape, action_shape, device, seed, batch_size, gamma, nstep,
            replay_size, start_steps)

        self.std = std
        self.target_update_coef = target_update_coef

        self.build_network()

        soft_update(self.actor_target, self.actor, 1.0)
        disable_gradient(self.actor_target)
        soft_update(self.critic_target, self.critic, 1.0)
        disable_gradient(self.critic_target)

        self.optim_actor = Adam(self.actor.parameters(), lr=lr_actor)
        self.optim_critic = Adam(self.critic.parameters(), lr=lr_critic)
예제 #7
0
 def update_target(self):
     soft_update(self.critic_target.encoder, self.critic.encoder,
                 self.target_update_coef_ae)
     soft_update(self.critic_target.mlp_critic, self.critic.mlp_critic,
                 self.target_update_coef)