def __init__(self, task, exploration_mu=0, exploration_theta=0.15, exploration_sigma=0.2, buffer_size=100000, batch_size=64, gamma=0.99, tau=0.01, actor_learning_rate=0.001, critic_learning_rate=0.001): self.task = task self.state_size = task.state_size self.action_size = task.action_size self.action_low = task.action_low self.action_high = task.action_high # Actor (Policy) Model self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high, learning_rate=actor_learning_rate) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high, learning_rate=actor_learning_rate) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size, learning_rate=critic_learning_rate) self.critic_target = Critic(self.state_size, self.action_size, learning_rate=critic_learning_rate) # Initialize target model parameters with local model parameters self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) # Noise process self.exploration_mu = exploration_mu self.exploration_theta = exploration_theta self.exploration_sigma = exploration_sigma self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma) # Replay memory self.buffer_size = buffer_size self.batch_size = batch_size self.memory = ReplayBuffer(self.buffer_size, self.batch_size) # Algorithm parameters self.gamma = gamma self.tau = tau
def __init__(self, actor_model, tgt_actor_model, critic_model, tgt_critic_model, action_limits, actor_lr=1e-4, critic_lr=1e-3, critic_decay=1e-2, tau=1e-3, gamma=0.99, process=None, rb_size=1e6, minibatch_size=64, warmup_episodes=0, episodes_trained=0, train_scores=None, test_scores=None, best_train_score=-np.inf): # Changed this to use generic env instead of Task super().__init__(warmup_episodes, episodes_trained, train_scores, test_scores, best_train_score) self.actor = Actor(actor_model, critic_model, lr=actor_lr) self.tgt_actor = Actor(tgt_actor_model, tgt_critic_model, lr=actor_lr) self.tgt_actor.set_weights(self.actor.get_weights()) self.critic = Critic(critic_model, lr=critic_lr, decay=critic_decay) self.tgt_critic = Critic(tgt_critic_model, lr=critic_lr, decay=critic_decay) self.tgt_critic.set_weights(self.critic.get_weights()) self.action_limits = action_limits self.process = process self.minibatch_size = minibatch_size self.buffer = ReplayBuffer(int(rb_size), self.minibatch_size) self.tau = tau self.gamma = gamma self.state_space = K.int_shape(critic_model.inputs[0])[1] self.action_space = K.int_shape(critic_model.inputs[1])[1] self.learning_phase = 1 if process is None: self.process = OUNoise(size=self.action_space, theta=0.15, mu=0, sigma=0.2) else: self.process = process
def __init__(self, task): self.task = task self.state_size = task.state_size self.action_size = task.action_size self.action_low = task.action_low self.action_high = task.action_high # Actor (Policy) Model self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Initialize target model parameters with local model parameters self.critic_target.model.set_weights(self.critic_local.model.get_weights()) self.actor_target.model.set_weights(self.actor_local.model.get_weights()) # Noise process self.exploration_mu = 0 self.exploration_theta = 0.15 self.exploration_sigma = 0.005 self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma) # Replay memory self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size, self.batch_size) # Algorithm parameters # Discount factor self.gamma = 0.99 # For soft update of target parameters self.tau = 0.15 self.best_score = -np.inf self.score = 0
def __init__(self, task): self.task = task self.state_size = task.state_size self.action_size = task.action_size self.action_low = task.action_low self.action_high = task.action_high # Actor (Policy) Model self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Initialize target model parameters with local model parameters self.critic_target.model.set_weights(self.critic_local.model.get_weights()) self.actor_target.model.set_weights(self.actor_local.model.get_weights()) # The reference for the values below can be seen in the DDPG paper # "Continuous control with deep reinforcement learning" # 7. EXPERIMENT DETAILS # # This # Noise process self.exploration_mu = 0 self.exploration_theta = 0.15 self.exploration_sigma = 0.2 self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma) # Replay memory self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size, self.batch_size) # Algorithm parameters self.gamma = 0.99 # discount factor self.tau = 0.001 # for soft update of target parameters # Tracking self.score = -np.inf self.best_score = -np.inf
def __init__(self, env): self.env = env self.state_size = env.observation_space.shape[0] self.action_size = env.action_space.shape[0] self.act_limit = self.env.action_space.high[0] # Actor (Policy) Model self.actor_local = Actor(env) self.actor_target = Actor(env) # Critic (Value) Model self.critic_local = Critic(env) self.critic_target = Critic(env) # Initialize target model parameters with local model parameters self.critic_target.model.set_weights(self.critic_local.model.get_weights()) self.actor_target.model.set_weights(self.actor_local.model.get_weights()) # Noise process self.exploration_mu = 0 self.exploration_theta = 0.15 self.exploration_sigma = 0.2 self.noise = OUNoise( self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma, ) # Replay memory self.buffer_size = 1000000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size, self.batch_size) # Algorithm parameters self.gamma = 0.99 # discount factor self.tau = 0.001 # for soft update of target parameters print(self.actor_local.model.summary()) print(self.critic_local.model.summary())