def __init__(self, config): Base_Agent.__init__(self, config) self.memory = Replay_Buffer(self.hyperparameters["buffer_size"], self.hyperparameters["batch_size"], config.seed) self.q_network_local = self.create_NN(input_dim=self.state_size, output_dim=self.action_size) self.q_network_optimizer = optim.Adam( self.q_network_local.parameters(), lr=self.hyperparameters["learning_rate"]) self.exploration_strategy = Epsilon_Greedy_Exploration(config)
def __init__(self, config): Base_Agent.__init__(self, config) assert self.action_types == "DISCRETE", "Action types must be discrete. Use SAC instead for continuous actions" assert self.config.hyperparameters["Actor"][ "final_layer_activation"] == "Softmax", "Final actor layer must be softmax" self.hyperparameters = config.hyperparameters self.critic_local = self.create_NN(input_dim=self.state_size, output_dim=self.action_size, key_to_use="Critic") self.critic_local_2 = self.create_NN(input_dim=self.state_size, output_dim=self.action_size, key_to_use="Critic", override_seed=self.config.seed + 1) self.critic_optimizer = torch.optim.Adam(self.critic_local.parameters(), lr=self.hyperparameters["Critic"]["learning_rate"], eps=1e-4) self.critic_optimizer_2 = torch.optim.Adam(self.critic_local_2.parameters(), lr=self.hyperparameters["Critic"]["learning_rate"], eps=1e-4) self.critic_target = self.create_NN(input_dim=self.state_size, output_dim=self.action_size, key_to_use="Critic") self.critic_target_2 = self.create_NN(input_dim=self.state_size, output_dim=self.action_size, key_to_use="Critic") Base_Agent.copy_model_over(self.critic_local, self.critic_target) Base_Agent.copy_model_over(self.critic_local_2, self.critic_target_2) self.memory = Replay_Buffer(self.hyperparameters["Critic"]["buffer_size"], self.hyperparameters["batch_size"], self.config.seed) self.actor_local = self.create_NN(input_dim=self.state_size, output_dim=self.action_size, key_to_use="Actor") self.actor_optimizer = torch.optim.Adam(self.actor_local.parameters(), lr=self.hyperparameters["Actor"]["learning_rate"], eps=1e-4) self.automatic_entropy_tuning = self.hyperparameters["automatically_tune_entropy_hyperparameter"] if self.automatic_entropy_tuning: # we set the max possible entropy as the target entropy self.target_entropy = -np.log((1.0 / self.action_size)) * 0.98 self.log_alpha = torch.zeros(1, requires_grad=True, device=self.device) self.alpha = self.log_alpha.exp() self.alpha_optim = Adam([self.log_alpha], lr=self.hyperparameters["Actor"]["learning_rate"], eps=1e-4) else: self.alpha = self.hyperparameters["entropy_term_weight"] assert not self.hyperparameters[ "add_extra_noise"], "There is no add extra noise option for the discrete version of SAC at moment" self.add_extra_noise = False self.do_evaluation_iterations = self.hyperparameters["do_evaluation_iterations"]
def __init__(self, buffer_size, batch_size, HER_sample_proportion): self.HER_memory = Replay_Buffer(buffer_size, batch_size, self.config.seed) self.ordinary_buffer_batch_size = int(batch_size * (1.0 - HER_sample_proportion)) self.HER_buffer_batch_size = batch_size - self.ordinary_buffer_batch_size
def __init__(self, config): Base_Agent.__init__(self, config) assert self.action_types == "CONTINUOUS", "Action types must be continuous. Use SAC Discrete instead for discrete actions" assert self.config.hyperparameters["Actor"][ "final_layer_activation"] != "Softmax", "Final actor layer must not be softmax" self.hyperparameters = config.hyperparameters self.critic_local = self.create_NN(input_dim=self.state_size + self.action_size, output_dim=1, key_to_use="Critic") self.critic_local_2 = self.create_NN( input_dim=self.state_size + self.action_size, output_dim=1, key_to_use="Critic", override_seed=self.config.seed + 1) self.critic_optimizer = torch.optim.Adam( self.critic_local.parameters(), lr=self.hyperparameters["Critic"]["learning_rate"], eps=1e-4) self.critic_optimizer_2 = torch.optim.Adam( self.critic_local_2.parameters(), lr=self.hyperparameters["Critic"]["learning_rate"], eps=1e-4) self.critic_target = self.create_NN(input_dim=self.state_size + self.action_size, output_dim=1, key_to_use="Critic") self.critic_target_2 = self.create_NN(input_dim=self.state_size + self.action_size, output_dim=1, key_to_use="Critic") Base_Agent.copy_model_over(self.critic_local, self.critic_target) Base_Agent.copy_model_over(self.critic_local_2, self.critic_target_2) self.memory = Replay_Buffer( self.hyperparameters["Critic"]["buffer_size"], self.hyperparameters["batch_size"], self.config.seed) self.actor_local = self.create_NN(input_dim=self.state_size, output_dim=self.action_size * 2, key_to_use="Actor") self.actor_optimizer = torch.optim.Adam( self.actor_local.parameters(), lr=self.hyperparameters["Actor"]["learning_rate"], eps=1e-4) self.automatic_entropy_tuning = self.hyperparameters[ "automatically_tune_entropy_hyperparameter"] if self.automatic_entropy_tuning: self.target_entropy = -torch.prod( torch.Tensor(self.environment.action_space.shape).to( self.device)).item() # heuristic value from the paper self.log_alpha = torch.zeros(1, requires_grad=True, device=self.device) self.alpha = self.log_alpha.exp() self.alpha_optim = Adam( [self.log_alpha], lr=self.hyperparameters["Actor"]["learning_rate"], eps=1e-4) else: self.alpha = self.hyperparameters["entropy_term_weight"] self.add_extra_noise = self.hyperparameters["add_extra_noise"] if self.add_extra_noise: self.noise = OU_Noise(self.action_size, self.config.seed, self.hyperparameters["mu"], self.hyperparameters["theta"], self.hyperparameters["sigma"]) self.do_evaluation_iterations = self.hyperparameters[ "do_evaluation_iterations"]