Ejemplo n.º 1
0
    def __init__(self, obs_space, act_space, sess, n_agents, name):
        self.act_space = act_space
        self.n_agents = n_agents

        self.ped_dqn = DQN(sess, obs_space, sup_len, act_space, n_agents, name)

        self.action_rb = ReplayBuffer(capacity=rb_capacity)
        self.mission_rb = ReplayBuffer(capacity=mrb_capacity)

        self.train_cnt = 0
        self.mission_train_cnt = 0
        self.sns_q = None
Ejemplo n.º 2
0
    def __init__(self, task, seed=None, render=False):
        self.env = task.env
        self.total_reward = 0
        self.steps = 0
        self.action_repeat = 3
        self.render = render
        
        # Score tracker and learning parameters
        self.score = -np.inf
        self.best_w = None
        self.best_score = -np.inf
        self.noise_scale = 0.1
        
        #counter
        self.count = 0

        # Replay memory
        self.buffer_size = 100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)
        
        # Noise process
        self.exploration_mu = 0
        self.exploration_theta = 0.15
        self.exploration_sigma = 0.2
        self.noise = OUNoise(1, self.exploration_mu, self.exploration_theta, self.exploration_sigma)
Ejemplo n.º 3
0
    def __init__(self, task):
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high
        
        self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high)

        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        self.critic_target.model.set_weights(self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(self.actor_local.model.get_weights())
        
        self.exploration_mu = 0
        self.exploration_theta = 0.10 # same direction
        self.exploration_sigma = 0.001 # random noise
        self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma)

        self.buffer_size = 100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        self.gamma = 0.90  # discount factor
        self.tau = 0.1  # for soft update of target parameters

        self.best_score = -np.inf
        self.score = 0
 def __init__(self, task):
     self.task = task
     self.state_size = task.state_size
     self.action_size = task.action_size
     self.action_low = task.action_low
     self.action_high = task.action_high
     self.actor_local = Actor(self.state_size, self.action_size,
                              self.action_low, self.action_high)
     self.actor_target = Actor(self.state_size, self.action_size,
                               self.action_low, self.action_high)
     self.critic_local = Critic(self.state_size, self.action_size)
     self.critic_target = Critic(self.state_size, self.action_size)
     self.critic_target.model.set_weights(
         self.critic_local.model.get_weights())
     self.actor_target.model.set_weights(
         self.actor_local.model.get_weights())
     self.exploration_mu = 0
     self.exploration_theta = 0.15
     self.exploration_sigma = 0.2
     self.noise = OUNoise(self.action_size, self.exploration_mu,
                          self.exploration_theta, self.exploration_sigma)
     self.buffer_size = 100000
     self.batch_size = 64
     self.memory = ReplayBuffer(self.buffer_size, self.batch_size)
     self.gamma = 0.99
     self.tau = 0.001
Ejemplo n.º 5
0
    def __init__(self, task):
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(self.actor_local.model.get_weights())

        # Noise process
        self.exploration_mu = 0
        self.exploration_theta = 0.15
        self.exploration_sigma = 0.01
        self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma)

        # Replay memory
        self.buffer_size = 100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters
        self.gamma = 0.99  # discount factor 0.99
        self.tau = 0.1  # for soft update of target parameters 0.01
Ejemplo n.º 6
0
    def __init__(self,
                 task,
                 buffer_size=100000,
                 batch_size=64,
                 gamma=0.99,
                 tau=0.01):
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size,
                                 self.action_size,
                                 self.action_low,
                                 self.action_high,
                                 learning_rate=1e-3)
        self.actor_target = Actor(self.state_size,
                                  self.action_size,
                                  self.action_low,
                                  self.action_high,
                                  learning_rate=1e-3)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size,
                                   self.action_size,
                                   learning_rate=1e-4)
        self.critic_target = Critic(self.state_size,
                                    self.action_size,
                                    learning_rate=1e-4)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        # Noise process
        self.noise = OUNoise(size=self.action_size)

        # Replay memory
        self.buffer_size = buffer_size
        self.batch_size = batch_size  # 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters
        self.gamma = gamma  # 0.99 discount factor
        self.tau = tau  # 0.01 for soft update of target parameters

        # 初始化
        self.last_state = None
        self.total_reward = 0.0

        # Score tracker and learning parameters
        self.score = 0
        self.best_score = -np.inf
        self.count = 0
Ejemplo n.º 7
0
 def load_model(cls, filename):
     with open(filename + '.ddpg_agent') as f:
         m = pickle.load(f)
     m.actor_local = load_model(filename + '.actor_local')
     m.actor_target = load_model(filename + '.actor_target')
     m.critic_local = load_model(filename + '.critic_local')
     m.critic_target = load_model(filename + '.critic_target')
     m.replay_buffer = ReplayBuffer(m.buffer_size, m.batch_size)
     return m
Ejemplo n.º 8
0
    def __init__(self, obs_space, act_space, sess, n_agents, name):
        self.obs_space = obs_space
        self.act_space = act_space
        self.n_agents = n_agents

        self.dqn = DQN(sess, obs_space, sup_len, act_space, n_agents, name)

        self.rb = ReplayBuffer(capacity=rb_capacity)
        self.train_cnt = 0
Ejemplo n.º 9
0
    def __init__(self, task, sess, stats):
        self.sess = sess
        self.task = task
        self.stats = stats

        tau = 0.01
        learning_rate = 2e-4

        self.critic_local = QNetwork(sess,
                                     task,
                                     stats,
                                     name='critic_local',
                                     hidden_units=64,
                                     dropout_rate=0.2)
        self.critic_target = QNetwork(sess,
                                      task,
                                      stats,
                                      name='critic_target',
                                      hidden_units=64,
                                      dropout_rate=0.2)
        self.actor_local = Policy(sess,
                                  task,
                                  stats,
                                  name='actor_local',
                                  hidden_units=32,
                                  dropout_rate=0.2)
        self.actor_target = Policy(sess,
                                   task,
                                   stats,
                                   name='actor_target',
                                   hidden_units=32,
                                   dropout_rate=0.2)
        soft_copy_critic_ops = self._create_soft_copy_op('critic_local',
                                                         'critic_target',
                                                         tau=tau)
        soft_copy_actor_ops = self._create_soft_copy_op('actor_local',
                                                        'actor_target',
                                                        tau=tau)
        self._soft_copy_ops = []
        self._soft_copy_ops.extend(soft_copy_critic_ops)
        self._soft_copy_ops.extend(soft_copy_actor_ops)

        self.gamma = 0.99  # reward discount rate

        # Exploration noise process
        exploration_mu = 0
        exploration_theta = 0.15
        exploration_sigma = 0.15
        self.noise = OUNoise(task.action_size, exploration_mu,
                             exploration_theta, exploration_sigma)

        # Replay memory
        self.batch_size = 256
        self.memory = ReplayBuffer(buffer_size=10000, decay_steps=1000)

        self.sess.run(tf.global_variables_initializer())
Ejemplo n.º 10
0
    def __init__(self, task, params={}):
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size,
                                 self.action_size,
                                 self.action_low,
                                 self.action_high,
                                 params=params)
        self.actor_target = Actor(self.state_size,
                                  self.action_size,
                                  self.action_low,
                                  self.action_high,
                                  params=params)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size, params)
        self.critic_target = Critic(self.state_size, self.action_size, params)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        # Noise process
        self.exploration_mu = 0
        self.exploration_theta = 0.15  # same direction
        self.exploration_sigma = 0.001  # random noise
        if (params.get("sigma")):
            self.exploration_sigma = params.get("sigma")

        self.noise = OUNoise(self.action_size, self.exploration_mu,
                             self.exploration_theta, self.exploration_sigma)

        # Replay memory
        self.buffer_size = 100000
        self.batch_size = 64
        if (params.get("batch_size")):
            self.batch_size = params.get("batch_size")
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters
        self.gamma = 0.99  # discount factor
        self.tau = 0.1  # for soft update of target parameters
        #self.gamma = 0.9
        #self.tau = 0.05

        # Statistics
        self.best_score = -np.inf
        self.score = 0
Ejemplo n.º 11
0
    def __init__(self, task):
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high

        # AE: Although OUNoise gives me a convenient set of randomness for each of the rotors, I still need
        # AE: to make a decision myself on how to apply the randomness and how to manage its magnitude
        # AE: (i.e. my eplore vs exploit strategy). These variables will do that.
        self.explore_start = 1.0  # AE: exploration probability at start
        self.explore_stop = 0.001  # AE: minimum exploration probability
        self.decay_rate = 0.003  # AE: exponential decay rate for exploration prob
        self.magnitude_coeff = 0.1  # AE: a coefficient to limit randomness

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.action_low, self.action_high)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        # Noise process
        self.exploration_mu = 0  # AE: additive to the noise. mu * theta will be directly added

        self.exploration_theta = 0.15  # AE: old noise will be multiplied by this
        self.exploration_sigma = 0.2  # AE: new noise will be multiplied by this
        self.noise = OUNoise(self.action_size, self.exploration_mu,
                             self.exploration_theta, self.exploration_sigma)

        # Replay memory
        self.buffer_size = 100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters
        self.gamma = 0.99  # discount factor
        # AE: The learning rate. How much we trust the new values compared to the old ones.
        self.tau = 0.0001  # for soft update of target parameters

        # AE: current reward in learning procedure (for statistics)
        self.score = -np.inf

        # Episode variables
        self.reset_episode()
Ejemplo n.º 12
0
    def __init__(self,
                 env,
                 actor_model,
                 critic_model,
                 gamma=0.99,
                 tau=1e-3,
                 critic_lr=1e-3,
                 actor_lr=1e-4,
                 critic_decay=0.):
        # Changed this to use generic env instead of Task
        super().__init__(env)

        self.state_size = env.observation_space.shape[0]
        self.action_size = env.action_space.shape[0]
        self.action_low = env.action_space.low
        self.action_high = env.action_space.high

        # Algorithm parameters
        self.gamma = gamma  # discount factor
        self.tau = tau  # for soft update of target parameters
        self.critic_lr = critic_lr
        self.actor_lr = actor_lr

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.action_low, self.action_high,
                                 self.actor_lr)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.action_low, self.action_high,
                                  self.actor_lr)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size,
                                   self.critic_lr)
        self.critic_target = Critic(self.state_size, self.action_size,
                                    self.critic_lr)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        # Noise process
        self.exploration_mu = 0
        self.exploration_theta = 0.15
        self.exploration_sigma = 0.2
        self.noise = OUNoise(self.action_size, self.exploration_mu,
                             self.exploration_theta, self.exploration_sigma)

        # Replay memory
        self.buffer_size = 100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)
Ejemplo n.º 13
0
    def setup_replay_buffer_(self):
        """
        Setup a replay buffer.
        :return:        None.
        """

        if self.prioritized_replay:
            self.replay_buffer = PrioritizedReplayBuffer(self.buffer_size, alpha=self.pr_alpha)
            self.beta_schedule = LinearSchedule(self.max_timesteps, initial_p=self.pr_beta, final_p=self.final_explore)
        else:
            self.replay_buffer = ReplayBuffer(self.buffer_size)
            self.beta_schedule = None
Ejemplo n.º 14
0
    def __init__(self, task, seed=None, render=False):

        self.env = task.env
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.action_low, self.action_high)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        self.total_reward = 0
        self.steps = 0
        self.action_repeat = 3
        self.render = render

        # Score tracker and learning parameters
        self.score = -np.inf
        self.best_w = None
        self.best_score = -np.inf
        self.noise_scale = 0.1

        #counter
        self.count = 0

        # Replay memory
        self.buffer_size = 100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Noise process
        self.exploration_mu = 0
        self.exploration_theta = 0.15
        self.exploration_sigma = 0.2
        self.noise = OUNoise(1, self.exploration_mu, self.exploration_theta,
                             self.exploration_sigma)

        # Algorithm parameters
        self.gamma = 0.99  # discount factor
        self.tau = 0.01  # for soft update of target parameters
Ejemplo n.º 15
0
    def __init__(self, task):
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(self.actor_local.model.get_weights())

        # Noise process
        self.exploration_mu = 0
        self.exploration_theta = 0.15
        self.exploration_sigma = 0.2
        self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma)

        # Replay memory
        self.buffer_size = 100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters
        self.gamma = 0.99  # discount factor
        self.tau = 0.01  # for soft update of target parameters

        # from plicy search
        
        self.action_range = self.action_high - self.action_low
        
        self.w = np.random.normal(
            size=(self.state_size, self.action_size),  # weights for simple linear policy: state_space x action_space
            scale=(self.action_range / (2 * self.state_size))) # start producing actions in a decent range
        
        # Score tracker and learning parameters
        self.score = -np.inf
        self.best_w = None
        self.best_score = -np.inf
        self.noise_scale = 0.1
        
        #counter
        self.count = 0
Ejemplo n.º 16
0
    def __init__(self, task, verbose=False):
        self.verbose = verbose

        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.action_low, self.action_high)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        #log_path = '/tmp/logs'
        #self.callback = callbacks.TensorBoard(log_dir=log_path, histogram_freq=1,
        #                        write_images=False, write_grads=True, write_graph=False)
        #self.callback.set_model(self.critic_local.model)

        #log_path = '/tmp/logs'
        #self.writer = tf.summary.FileWriter(log_path)

        #self.learn_counter = 0

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        # Noise process
        self.exploration_mu = 0.1
        self.exploration_theta = 0.2
        self.exploration_sigma = 0.2
        self.noise = OUNoise(self.action_size, self.exploration_mu,
                             self.exploration_theta, self.exploration_sigma)

        # Replay memory
        self.buffer_size = 100000
        self.batch_size = 512
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters
        self.gamma = 0.99  # discount factor
        self.tau = 0.015  # for soft update of target parameters
Ejemplo n.º 17
0
    def __init__(self,
                 actor_model,
                 tgt_actor_model,
                 critic_model,
                 tgt_critic_model,
                 action_limits,
                 actor_lr=1e-4,
                 critic_lr=1e-3,
                 critic_decay=1e-2,
                 tau=1e-3,
                 gamma=0.99,
                 process=None,
                 rb_size=1e6,
                 minibatch_size=64,
                 warmup_episodes=0,
                 episodes_trained=0,
                 train_scores=None,
                 test_scores=None,
                 best_train_score=-np.inf):
        # Changed this to use generic env instead of Task
        super().__init__(warmup_episodes, episodes_trained, train_scores,
                         test_scores, best_train_score)
        self.actor = Actor(actor_model, critic_model, lr=actor_lr)
        self.tgt_actor = Actor(tgt_actor_model, tgt_critic_model, lr=actor_lr)
        self.tgt_actor.set_weights(self.actor.get_weights())

        self.critic = Critic(critic_model, lr=critic_lr, decay=critic_decay)
        self.tgt_critic = Critic(tgt_critic_model,
                                 lr=critic_lr,
                                 decay=critic_decay)
        self.tgt_critic.set_weights(self.critic.get_weights())

        self.action_limits = action_limits
        self.process = process
        self.minibatch_size = minibatch_size
        self.buffer = ReplayBuffer(int(rb_size), self.minibatch_size)
        self.tau = tau
        self.gamma = gamma

        self.state_space = K.int_shape(critic_model.inputs[0])[1]
        self.action_space = K.int_shape(critic_model.inputs[1])[1]

        self.learning_phase = 1
        if process is None:
            self.process = OUNoise(size=self.action_space,
                                   theta=0.15,
                                   mu=0,
                                   sigma=0.2)
        else:
            self.process = process
Ejemplo n.º 18
0
    def __init__(self, task):
        """Initialize DDPG Agent instance."""
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_high = task.action_high
        self.action_low = task.action_low

        # Initializing local and target Actor Models
        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.action_high, self.action_low)

        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.action_high, self.action_low)

        # Initializing local and target Critic Models
        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # Initialize target model parameters with local model parameters
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())

        self.exploration_mu = 0
        self.exploration_theta = 0.15
        self.exploration_sigma = 0.2
        self.noise = OUNoise(self.action_size, self.exploration_mu,
                             self.exploration_theta, self.exploration_sigma)

        # Replay Memory
        self.buffer_size = 100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters
        self.gamma = 0.99  # discount factor
        self.tau = 0.01  # for soft update of target parameters

        # Additional Parameters
        self.best_score = -np.inf
        self.total_reward = 0.0
        self.count = 0
        self.score = 0
Ejemplo n.º 19
0
    def __init__(self, env_reset, state_size, action_size, action_low, action_high):
        """Params:
        env_reset: callback function to reset environemnt at end of episode
        state_size: dimension of state space
        action_size: dimension of action space
        action_low: float - minimum action value
        action_high: float - maximum action value
        """
        self.training_steps = 0 # number of training steps run so far

        self.env_reset = env_reset
        self.state_size = state_size
        self.action_size = action_size
        self.action_low = action_low
        self.action_high = action_high

        # Algorithm parameters
        self.gamma = 0.99  # discount factor
        self.tau = 1e-3  # for soft update of target parameters
        self.critic_decay = 1e-2 # L2 weight decay for critic (regularization)
        self.critic_lr = 1e-3 # Learning rate for critic
        self.critic_alpha = 1e-2 # Leaky ReLU alpha for critic
        self.actor_lr = 1e-4 # Learning rate for actor
        self.actor_alpha = 1e-2 # Leaky ReLU alpha for actor

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high, self.actor_lr, self.actor_alpha)
        self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high, self.actor_lr, self.actor_alpha)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size, self.critic_lr, self.critic_decay, self.critic_alpha)
        self.critic_target = Critic(self.state_size, self.action_size, self.critic_lr, self.critic_decay,self.critic_alpha)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(self.actor_local.model.get_weights())

        # Noise process
        self.exploration_mu = 0
        self.exploration_theta = 0.15
        self.exploration_sigma = 0.2
        self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma)

        # Replay memory
        self.buffer_size = int(1e6)
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)
Ejemplo n.º 20
0
    def __init__(self, task):
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.action_low, self.action_high)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        # Noise process
        self.exploration_mu = 0
        self.exploration_theta = 0.15
        # Y.W. changing sigma
        self.exploration_sigma = 0.3  #0.3 #0.2 # 0.3
        self.noise = OUNoise(self.action_size, self.exploration_mu,
                             self.exploration_theta, self.exploration_sigma)

        # Replay memory
        # Y.W. extending buffer_size
        self.buffer_size = 1000000  #100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters
        self.gamma = 0.99  # discount factor
        # self.tau = 0.01  # for soft update of target parameters
        # Y.W.
        self.tau = 0.001  # 0.001

        # simple reword cash
        self.total_reward = 0.0
        self.best_total_reward = -np.inf
    def __init__(self, task):
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.action_low, self.action_high)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        # Noise process
        self.exploration_mu = 0
        self.exploration_theta = 0.15
        self.exploration_sigma = 0.2
        self.noise = OUNoise(self.action_size, self.exploration_mu,
                             self.exploration_theta, self.exploration_sigma)

        # Replay memory
        self.buffer_size = 100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters
        self.gamma = 0.99  # discount factor
        self.tau = 0.01  # for soft update of target parameters

        # ======================== Custom amednments : Score tracker and learning parameters ========================
        self.best_score = -np.inf
        self.score = 0
        self.total_reward = 0.0
        self.count = 0
        self.best_position = np.zeros(3)
Ejemplo n.º 22
0
    def __init__(self, action_size):
        # Noise process
        self.exploration_mu = 0
        self.exploration_theta = 0.15  # same direction
        self.exploration_sigma = 0.001  # random noise

        self.noise = OUNoise(action_size, self.exploration_mu,
                             self.exploration_theta, self.exploration_sigma)

        # Replay memory
        self.buffer_size = 100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)
        #self.memory = Memory(self.buffer_size, self.batch_size)

        # Algorithm parameters
        self.gamma = 0.99  # discount factor
        self.tau = 0.1  # for soft update of target parameters
    def set_params(self,
                   mu=0.1,
                   sigma=0.1,
                   theta=0.1,
                   buffer_size=1e+8,
                   batch_size=128,
                   gamma=0.99,
                   tau=1e-3):
        self.exploration_mu = mu
        self.exploration_sigma = sigma
        self.exploration_theta = theta
        self.noise = noise(self.action_size, self.exploration_mu,
                           self.exploration_theta, self.exploration_sigma)

        self.buffer_size = int(buffer_size)
        self.batch_size = int(batch_size)
        self.buffer = ReplayBuffer(self.buffer_size)

        self.gamma = gamma
        self.tau = tau
Ejemplo n.º 24
0
    def __init__(self, task):
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high

        self.exploration_mu = 0
        self.exploration_theta = 0.15
        self.exploration_sigma = 0.001

        self.buffer_size = 100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size)

        self.gamma = 0.99
        self.tau = 0.1
        self.learning_rate = 0.0005

        self.noise = OUNoise(self.action_size, self.exploration_mu,
                             self.exploration_theta, self.exploration_sigma)

        self.actor_local = Actor(self.state_size,
                                 self.action_size,
                                 self.action_low,
                                 self.action_high,
                                 learning_rate=self.learning_rate)
        self.actor_target = Actor(self.state_size,
                                  self.action_size,
                                  self.action_low,
                                  self.action_high,
                                  learning_rate=self.learning_rate)

        self.critic_local = Critic(self.state_size,
                                   self.action_size,
                                   learning_rate=self.learning_rate)
        self.critic_target = Critic(self.state_size,
                                    self.action_size,
                                    learning_rate=self.learning_rate)
Ejemplo n.º 25
0
 def __init__(self, task):
     self.task = task
     self.state_size = task.state_size
     self.action_size = task.action_size
     self.action_low = task.action_low
     self.action_high = task.action_high
     
     #actor model
     self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high)
     self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high)
     
     #Critic model
     self.critic_local = Critic(self.state_size, self.action_size)
     self.critic_target = Critic(self.state_size, self.action_size)
     
     #Initialize target model params with local params
     self.critic_target.model.set_weights(
             self.critic_local.model.get_weights())
     self.actor_target.model.set_weights(
             self.actor_local.model.get_weights())
     
     #Initialize noise process
     self.exploration_mu = 0
     self.exploration_theta = 0.15
     self.exploration_sigma = 0.2
     self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma)
     
     #Replay memory Initialization
     self.buffer_size, self.batch_size = 2000000, 64
     self.memory = ReplayBuffer(self.buffer_size, self.batch_size)
     
     #Initialize algorithm parameters
     self.gamma, self.tau = 0.95, 0.001
     
     #Initialize scores
     self.score, self.best_score = -np.inf, -np.inf
Ejemplo n.º 26
0
    def __init__(self,
                 task,
                 mu=0.02,
                 theta=0.16,
                 sigma=0.21,
                 buffer=500000,
                 batch=64,
                 gamma=0.98,
                 tau=0.02,
                 learning=0.001,
                 dropout=0.2):

        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.action_low, self.action_high, learning,
                                 dropout)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.action_low, self.action_high, learning,
                                  dropout)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size, learning,
                                   dropout)
        self.critic_target = Critic(self.state_size, self.action_size,
                                    learning, dropout)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        # Noise process
        self.exploration_mu = mu
        self.exploration_theta = theta
        self.exploration_sigma = sigma
        self.noise = OUNoise(self.action_size, self.exploration_mu,
                             self.exploration_theta, self.exploration_sigma)

        # Replay memory
        self.buffer_size = buffer
        self.batch_size = batch
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters
        self.gamma = gamma  # discount factor
        self.tau = tau  # for soft update of target parameters
        # Score tracker and learning parameters
        #        self.best_w = None
        self.score = 0
        self.best_score = -np.inf
        self.noise_scale = 0.1

        # Episode variables
        self.reset_episode()
    def __init__(self,
                 state_size,
                 action_size,
                 num_agents,
                 random_seed,
                 lr_actor=1e-4,
                 lr_critic=1e-3,
                 fc1_units=400,
                 fc2_units=300,
                 buffer_size=int(1e5),
                 batch_size=128,
                 gamma=0.99,
                 tau=1e-3,
                 max_norm=1.0,
                 learn_period=20,
                 learn_sampling_num=10):
        """Initialize an Agent object.

        Args:
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            random_seed (int): random seed
            
            max_norm (float): value of clip_grad_norm for critic optimizer
        """

        super().__init__()

        self.state_size = state_size
        self.num_agents = num_agents
        self.action_size = action_size
        self.seed = random.seed(random_seed)
        self.max_norm = max_norm
        self.learn_period = learn_period
        self.learn_sampling_num = learn_sampling_num

        # Actor Network (w/ Target Network)
        self.actor_local = DDPGActor(state_size,
                                     action_size,
                                     random_seed,
                                     fc1_units=fc1_units,
                                     fc2_units=fc2_units).to(device)
        self.actor_target = DDPGActor(state_size,
                                      action_size,
                                      random_seed,
                                      fc1_units=fc1_units,
                                      fc2_units=fc2_units).to(device)

        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=lr_actor)

        # Critic Network (w/ Target Network)
        self.critic_local = DDPGCritic(state_size,
                                       action_size,
                                       random_seed,
                                       fcs1_units=fc1_units,
                                       fc2_units=fc2_units).to(device)
        self.critic_target = DDPGCritic(state_size,
                                        action_size,
                                        random_seed,
                                        fcs1_units=fc1_units,
                                        fc2_units=fc2_units).to(device)

        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=lr_critic)

        # Noise process for action
        # Noise process
        self.exploration_mu = 0
        self.exploration_theta = 0.15  # (Timothy Lillicrap, 2016)
        self.exploration_sigma = 0.2  # (Timothy Lillicrap, 2016)
        #         self.noise = OUNoise(action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma)
        self.noise = OUNoiseMultivariate((num_agents, action_size),
                                         random_seed,
                                         mu=self.exploration_mu,
                                         theta=self.exploration_theta,
                                         sigma=self.exploration_sigma)

        # Replay memory
        self.memory = ReplayBuffer(action_size, buffer_size, batch_size,
                                   random_seed, device)

        # parameter of discounted reward
        self.gamma = gamma

        # soft update parameter
        self.tau = tau

        self.batch_size = batch_size
Ejemplo n.º 28
0
    def __init__(self, task):
        '''
        Params
        ======
        task (object)   : environment

        '''
        '''
        Reference: Continuous Control With Deep Reinforcement Learning(2016)
        Playing CartPole through Asynchronous Advantage Actor Critic (A3C) with tf.keras
        =========
        gamma   : 0.99
        tau     : 0.001
        buffer_size (ReplayBuffer)  : 1e6
        batch_size (ReplayBuffer)   : 64
        theta (Ornstein-Uhlenbeck process)  : 0.15
        sigma (Ornstein-Uhlenbeck process)  : 0.2


        '''

        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high

        # actor (policy) model - use two copies of model for updating model and producing target
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.action_low, self.action_high)

        # critic (value) model - use two copies of model for updating model and producing target
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # initialize target model parameters with local model parameters
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())

        # noise process
        self.exploration_mu = 0
        self.exploration_theta = 0.15
        self.exploration_sigma = 0.2
        self.noise = OUNoise(self.action_size, self.exploration_mu,
                             self.exploration_theta, self.exploration_sigma)

        # replay memory
        self.buffer_size = 1000000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # algorithm parameters
        self.gamma = 0.99  # discount factor
        self.tau = 0.001  # for soft update of target parameters

        # reward history
        self.best_avg_score = -np.inf
        self.accumulated_reward = 0
        self.count = 0
Ejemplo n.º 29
0
    def __init__(self, task, prioritized_replay=True):
        self.task = task
        self.state_size = task.state_size
        self.action_size = task.action_size
        self.action_low = task.action_low
        self.action_high = task.action_high

        # Actor (Policy) Model
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.action_low, self.action_high)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        # Noise process
        self.exploration_mu = 0
        self.exploration_theta = 0.15  #0.15 #0.1
        self.exploration_sigma = 0.2  #0.2 #0.1
        self.noise = OUNoise(self.action_size, self.exploration_mu,
                             self.exploration_theta, self.exploration_sigma)

        self.buffer_size = 100000
        self.batch_size = 64  # 64

        self.prioritized_replay = prioritized_replay
        self.prioritized_replay_alpha = 0.6
        self.prioritized_replay_beta0 = 0.4
        self.prioritized_replay_beta_iters = None
        self.prioritized_replay_eps = 1e-6
        self.max_timesteps = 100000

        # Replay buffer
        if self.prioritized_replay:
            self.memory = PrioritizedReplayBuffer(
                self.buffer_size, alpha=self.prioritized_replay_alpha)
            if self.prioritized_replay_beta_iters is None:
                self.prioritized_replay_beta_iters = self.max_timesteps
            self.beta_schedule = LinearSchedule(
                self.prioritized_replay_beta_iters,
                initial_p=self.prioritized_replay_beta0,
                final_p=1.0)
        else:
            self.memory = ReplayBuffer(self.buffer_size, self.batch_size)

        # Algorithm parameters
        self.gamma = 0.99  # discount factor
        self.tau = 0.01  # for soft update of target parameters
        #self.tau = 0.001 # 0.001 per paper

        self.td_errors_list = []
        self.actor_loss_list = []
        self.critic_loss_list = []