Ejemplo n.º 1
0
    def __init__(self,
                 task,
                 exploration_mu=0,
                 exploration_theta=0.15,
                 exploration_sigma=0.2,
                 buffer_size=100000,
                 batch_size=64,
                 gamma=0.99,
                 tau=0.01,
                 actor_learning_rate=0.001,
                 critic_learning_rate=0.001):
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size,
                                 self.action_size,
                                 self.action_low,
                                 self.action_high,
                                 learning_rate=actor_learning_rate)
        self.actor_target = Actor(self.state_size,
                                  self.action_size,
                                  self.action_low,
                                  self.action_high,
                                  learning_rate=actor_learning_rate)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size,
                                   self.action_size,
                                   learning_rate=critic_learning_rate)
        self.critic_target = Critic(self.state_size,
                                    self.action_size,
                                    learning_rate=critic_learning_rate)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        # Noise process
        self.exploration_mu = exploration_mu
        self.exploration_theta = exploration_theta
        self.exploration_sigma = exploration_sigma
        self.noise = OUNoise(self.action_size, self.exploration_mu,
                             self.exploration_theta, self.exploration_sigma)

        # Replay memory
        self.buffer_size = buffer_size
        self.batch_size = batch_size
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters
        self.gamma = gamma
        self.tau = tau
 def __init__(self, task):
     self.task = task
     self.state_size = task.state_size
     self.action_size = task.action_size
     self.action_low = task.action_low
     self.action_high = task.action_high
     
     # Actor (Policy) Model
     self.actor_local = Actor(self.state_size,
                              self.action_size,
                              self.action_low,
                              self.action_high)
     self.actor_target = Actor(self.state_size,
                               self.action_size,
                               self.action_low,
                               self.action_high)
     
     # Critic (Value) Model
     self.critic_local = Critic(self.state_size,
                                self.action_size)
     self.critic_target = Critic(self.state_size,
                                 self.action_size)
     
     # Initialize target model parameters with local model parameters
     self.critic_target.model.set_weights(self.critic_local.model.get_weights())
     self.actor_target.model.set_weights(self.actor_local.model.get_weights())
     
     # Noise process
     self.exploration_mu = 0
     self.exploration_theta = 0.15
     self.exploration_sigma = 0.005
     self.noise = OUNoise(self.action_size,
                          self.exploration_mu,
                          self.exploration_theta,
                          self.exploration_sigma)
     
     # Replay memory
     self.buffer_size = 100000
     self.batch_size = 64
     self.memory = ReplayBuffer(self.buffer_size, self.batch_size)
     
     # Algorithm parameters
     # Discount factor
     self.gamma = 0.99
     
     # For soft update of target parameters
     self.tau = 0.15
     
     self.best_score = -np.inf
     self.score = 0
Ejemplo n.º 3
0
    def __init__(self, task):
        # Task (environment) information
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high
        self.action_range = self.action_high - self.action_low

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.action_low, self.action_high)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        # Noise process
        self.exploration_mu = 0
        self.exploration_theta = 0.15
        self.exploration_sigma = 0.2
        self.noise = OUNoise(self.action_size, self.exploration_mu,
                             self.exploration_theta, self.exploration_sigma)

        # Replay memory
        self.buffer_size = 100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters
        self.gamma = 0.99  # discount factor
        self.tau = 0.01  # for soft update of target parameters
        # self.last_state = None
        self.best_w = None
        self.best_score = -np.inf
        self.noise_scale = 0.1
        self.score = 0
        self.total_reward = None
        self.count = 0
Ejemplo n.º 4
0
    def __init__(self, task):
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(self.actor_local.model.get_weights())

        # The reference for the values below can be seen in the DDPG paper
        #   "Continuous control with deep reinforcement learning"
        #   7. EXPERIMENT DETAILS
        #
        # This 

        # Noise process
        self.exploration_mu = 0
        self.exploration_theta = 0.15
        self.exploration_sigma = 0.2
        self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma)

        # Replay memory
        self.buffer_size = 100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters
        self.gamma = 0.99  # discount factor
        self.tau = 0.001  # for soft update of target parameters

       # Tracking
        self.score = -np.inf
        self.best_score = -np.inf