def update_target(self): super().update_target() soft_update( self.error_target, self.error, self.target_update_coef )
def __init__(self, state_shape, action_shape, device, seed, batch_size=256, gamma=0.99, nstep=1, replay_size=10**6, start_steps=10**4, lr_actor=3e-4, lr_critic=3e-4, lr_alpha=3e-4, alpha_init=1.0, target_update_coef=5e-3, lr_error=3e-4, tau_init=10.0, start_steps_is=10**4): super().__init__( state_shape, action_shape, device, seed, batch_size, gamma, nstep, replay_size, start_steps, lr_actor, lr_critic, lr_alpha, alpha_init, target_update_coef) assert nstep == 1, 'DisCor only supports nstep=1.' self.error = TwinnedErrorFunc( state_shape=self.state_shape, action_shape=self.action_shape, hidden_units=[256, 256, 256], hidden_activation=nn.ReLU(inplace=True) ).to(self.device) self.error_target = TwinnedErrorFunc( state_shape=self.state_shape, action_shape=self.action_shape, hidden_units=[256, 256, 256], hidden_activation=nn.ReLU(inplace=True) ).to(self.device).eval() soft_update(self.error_target, self.error, 1.0) disable_gradient(self.error_target) self.optim_error = Adam(self.error.parameters(), lr=lr_error) self.tau1 = torch.tensor(tau_init, device=device, requires_grad=False) self.tau2 = torch.tensor(tau_init, device=device, requires_grad=False) self.start_steps_is = start_steps_is
def __init__(self, state_shape, action_shape, device, seed, batch_size=256, gamma=0.99, nstep=1, replay_size=10**6, start_steps=10**4, lr_actor=3e-4, lr_critic=3e-4, lr_alpha=3e-4, alpha_init=1.0, target_update_coef=5e-3): super().__init__(state_shape, action_shape, device, seed, batch_size, gamma, nstep, replay_size, start_steps) self.build_network() soft_update(self.critic_target, self.critic, 1.0) disable_gradient(self.critic_target) self.optim_actor = Adam(self.actor.parameters(), lr=lr_actor) self.optim_critic = Adam(self.critic.parameters(), lr=lr_critic) self.alpha = alpha_init self.log_alpha = torch.tensor(np.log(self.alpha), device=device, requires_grad=True) self.optim_alpha = torch.optim.Adam([self.log_alpha], lr=lr_alpha) self.target_entropy = -float(action_shape[0]) self.target_update_coef = target_update_coef
def __init__(self, state_shape, action_shape, device, seed, batch_size=128, gamma=0.99, nstep=1, replay_size=10**6, start_steps=1000, lr_encoder=1e-3, lr_decoder=1e-3, lr_actor=1e-3, lr_critic=1e-3, lr_alpha=1e-4, alpha_init=0.1, update_freq_actor=2, update_freq_ae=1, update_freq_target=2, target_update_coef=0.01, target_update_coef_ae=0.05, lambda_rae_latents=1e-6, lambda_rae_weights=1e-7, lr_error=1e-3, tau_init=10.0, start_steps_is=10**4, update_freq_error=2): super().__init__(state_shape, action_shape, device, seed, batch_size, gamma, nstep, replay_size, start_steps, lr_encoder, lr_decoder, lr_actor, lr_critic, lr_alpha, alpha_init, update_freq_actor, update_freq_ae, update_freq_target, target_update_coef, target_update_coef_ae, lambda_rae_latents, lambda_rae_weights) self.error = TwinnedErrorFuncWithEncoder( encoder=self.encoder, action_shape=self.action_shape, hidden_units=[1024, 1024], hidden_activation=nn.ReLU(inplace=True)).to(self.device) self.error_target = TwinnedErrorFuncWithEncoder( encoder=self.encoder_target, action_shape=self.action_shape, hidden_units=[1024, 1024], hidden_activation=nn.ReLU(inplace=True)).to(self.device).eval() soft_update(self.error_target.mlp_error, self.error.mlp_error, 1.0) disable_gradient(self.error_target.mlp_error) self.optim_error = Adam(self.error.parameters(), lr=lr_error) self.tau1 = torch.tensor(tau_init, device=device, requires_grad=False) self.tau2 = torch.tensor(tau_init, device=device, requires_grad=False) self.start_steps_is = start_steps_is self.update_freq_error = update_freq_error
def update_target(self): soft_update( self.actor_target, self.actor, self.target_update_coef ) soft_update( self.critic_target, self.critic, self.target_update_coef )
def __init__(self, state_shape, action_shape, device, seed, batch_size=128, gamma=0.99, nstep=1, replay_size=10**6, start_steps=10**4, lr_actor=1e-3, lr_critic=1e-3, std=0.1, target_update_coef=5e-3): super().__init__( state_shape, action_shape, device, seed, batch_size, gamma, nstep, replay_size, start_steps) self.std = std self.target_update_coef = target_update_coef self.build_network() soft_update(self.actor_target, self.actor, 1.0) disable_gradient(self.actor_target) soft_update(self.critic_target, self.critic, 1.0) disable_gradient(self.critic_target) self.optim_actor = Adam(self.actor.parameters(), lr=lr_actor) self.optim_critic = Adam(self.critic.parameters(), lr=lr_critic)
def update_target(self): soft_update(self.critic_target.encoder, self.critic.encoder, self.target_update_coef_ae) soft_update(self.critic_target.mlp_critic, self.critic.mlp_critic, self.target_update_coef)