Exemple #1
0
    def __init__(self,
                 task,
                 exploration_mu=0,
                 exploration_theta=0.15,
                 exploration_sigma=0.2,
                 buffer_size=100000,
                 batch_size=64,
                 gamma=0.99,
                 tau=0.01,
                 actor_learning_rate=0.001,
                 critic_learning_rate=0.001):
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size,
                                 self.action_size,
                                 self.action_low,
                                 self.action_high,
                                 learning_rate=actor_learning_rate)
        self.actor_target = Actor(self.state_size,
                                  self.action_size,
                                  self.action_low,
                                  self.action_high,
                                  learning_rate=actor_learning_rate)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size,
                                   self.action_size,
                                   learning_rate=critic_learning_rate)
        self.critic_target = Critic(self.state_size,
                                    self.action_size,
                                    learning_rate=critic_learning_rate)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        # Noise process
        self.exploration_mu = exploration_mu
        self.exploration_theta = exploration_theta
        self.exploration_sigma = exploration_sigma
        self.noise = OUNoise(self.action_size, self.exploration_mu,
                             self.exploration_theta, self.exploration_sigma)

        # Replay memory
        self.buffer_size = buffer_size
        self.batch_size = batch_size
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters
        self.gamma = gamma
        self.tau = tau
    def __init__(self,
                 actor_model,
                 tgt_actor_model,
                 critic_model,
                 tgt_critic_model,
                 action_limits,
                 actor_lr=1e-4,
                 critic_lr=1e-3,
                 critic_decay=1e-2,
                 tau=1e-3,
                 gamma=0.99,
                 process=None,
                 rb_size=1e6,
                 minibatch_size=64,
                 warmup_episodes=0,
                 episodes_trained=0,
                 train_scores=None,
                 test_scores=None,
                 best_train_score=-np.inf):
        # Changed this to use generic env instead of Task
        super().__init__(warmup_episodes, episodes_trained, train_scores,
                         test_scores, best_train_score)
        self.actor = Actor(actor_model, critic_model, lr=actor_lr)
        self.tgt_actor = Actor(tgt_actor_model, tgt_critic_model, lr=actor_lr)
        self.tgt_actor.set_weights(self.actor.get_weights())

        self.critic = Critic(critic_model, lr=critic_lr, decay=critic_decay)
        self.tgt_critic = Critic(tgt_critic_model,
                                 lr=critic_lr,
                                 decay=critic_decay)
        self.tgt_critic.set_weights(self.critic.get_weights())

        self.action_limits = action_limits
        self.process = process
        self.minibatch_size = minibatch_size
        self.buffer = ReplayBuffer(int(rb_size), self.minibatch_size)
        self.tau = tau
        self.gamma = gamma

        self.state_space = K.int_shape(critic_model.inputs[0])[1]
        self.action_space = K.int_shape(critic_model.inputs[1])[1]

        self.learning_phase = 1
        if process is None:
            self.process = OUNoise(size=self.action_space,
                                   theta=0.15,
                                   mu=0,
                                   sigma=0.2)
        else:
            self.process = process
 def __init__(self, task):
     self.task = task
     self.state_size = task.state_size
     self.action_size = task.action_size
     self.action_low = task.action_low
     self.action_high = task.action_high
     
     # Actor (Policy) Model
     self.actor_local = Actor(self.state_size,
                              self.action_size,
                              self.action_low,
                              self.action_high)
     self.actor_target = Actor(self.state_size,
                               self.action_size,
                               self.action_low,
                               self.action_high)
     
     # Critic (Value) Model
     self.critic_local = Critic(self.state_size,
                                self.action_size)
     self.critic_target = Critic(self.state_size,
                                 self.action_size)
     
     # Initialize target model parameters with local model parameters
     self.critic_target.model.set_weights(self.critic_local.model.get_weights())
     self.actor_target.model.set_weights(self.actor_local.model.get_weights())
     
     # Noise process
     self.exploration_mu = 0
     self.exploration_theta = 0.15
     self.exploration_sigma = 0.005
     self.noise = OUNoise(self.action_size,
                          self.exploration_mu,
                          self.exploration_theta,
                          self.exploration_sigma)
     
     # Replay memory
     self.buffer_size = 100000
     self.batch_size = 64
     self.memory = ReplayBuffer(self.buffer_size, self.batch_size)
     
     # Algorithm parameters
     # Discount factor
     self.gamma = 0.99
     
     # For soft update of target parameters
     self.tau = 0.15
     
     self.best_score = -np.inf
     self.score = 0
Exemple #4
0
    def __init__(self, task):
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(self.actor_local.model.get_weights())

        # The reference for the values below can be seen in the DDPG paper
        #   "Continuous control with deep reinforcement learning"
        #   7. EXPERIMENT DETAILS
        #
        # This 

        # Noise process
        self.exploration_mu = 0
        self.exploration_theta = 0.15
        self.exploration_sigma = 0.2
        self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma)

        # Replay memory
        self.buffer_size = 100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters
        self.gamma = 0.99  # discount factor
        self.tau = 0.001  # for soft update of target parameters

       # Tracking
        self.score = -np.inf
        self.best_score = -np.inf
    def __init__(self, env):
        self.env = env
        self.state_size = env.observation_space.shape[0]
        self.action_size = env.action_space.shape[0]
        self.act_limit = self.env.action_space.high[0]

        # Actor (Policy) Model
        self.actor_local = Actor(env)
        self.actor_target = Actor(env)

        # Critic (Value) Model
        self.critic_local = Critic(env)
        self.critic_target = Critic(env)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(self.actor_local.model.get_weights())

        # Noise process
        self.exploration_mu = 0
        self.exploration_theta = 0.15
        self.exploration_sigma = 0.2
        self.noise = OUNoise(
            self.action_size,
            self.exploration_mu,
            self.exploration_theta,
            self.exploration_sigma,
        )

        # Replay memory
        self.buffer_size = 1000000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters
        self.gamma = 0.99  # discount factor
        self.tau = 0.001  # for soft update of target parameters

        print(self.actor_local.model.summary())
        print(self.critic_local.model.summary())