class PrioritizedReplayMemory: e = 0.01 a = 0.6 def __init__(self, capacity): self.tree = SumTree(capacity) def _getPriority(self, error): return (error + self.e) ** self.a def add(self, error, sample): p = self._getPriority(error) self.tree.add(p, sample) def sample(self, n): batch = [] segment = self.tree.total() / n for i in range(n): a = segment * i b = segment * (i+1) s = random.uniform(a, b) (idx, p, data) = self.tree.get(s) batch.append( (idx, data) ) return batch def update(self, idx, error): p = self._getPriority(error) self.tree.update(idx, p) def isFull(self): return self.tree.isFull()
class Memory: # stored as ( s, a, r, s_ ) in SumTree e = 0.01 a = 0.6 def __init__(self, capacity): self.tree = SumTree(capacity) def _getPriority(self, error): return (error + self.e)**self.a def add(self, error, sample): p = self._getPriority(error) self.tree.add(p, sample) def sample(self, n): batch = [] segment = self.tree.total() / n for i in range(n): a = segment * i b = segment * (i + 1) s = random.uniform(a, b) (idx, p, data) = self.tree.get(s) batch.append((idx, data)) return batch def update(self, idx, error): p = self._getPriority(error) self.tree.update(idx, p)
class PrioritizedER: e = 0.01 a = 0.6 beta = 0.4 beta_increment_per_sampling = 0.001 def __init__(self, capacity): self.tree = SumTree(capacity) self.capacity = capacity def _get_priority(self, error): return (abs(error) + self.e)**self.a def push(self, error, sample): p = self._get_priority(error) self.tree.add(p, sample) def sample(self, n): batch = [] idxs = [] segment = self.tree.total() / n priorities = [] self.beta = np.min([1., self.beta + self.beta_increment_per_sampling]) for i in range(n): a = segment * i b = segment * (i + 1) s = random.uniform(a, b) (idx, p, data) = self.tree.get(s) if data == 0: p = priorities[-1] data = batch[-1] idx = idxs[-1] print( 'WARNING: transition value was 0, replaced it with the previous sampled transition' ) priorities.append(p) batch.append(data) idxs.append(idx) sampling_probabilities = (priorities / self.tree.total()) + 10e-5 is_weight = np.power(self.tree.n_entries * sampling_probabilities, -self.beta) is_weight /= is_weight.max() return batch, idxs, is_weight def update(self, idx, error): p = self._get_priority(error) self.tree.update(idx, p) def __len__(self): return self.tree.n_entries
class Memory: """ Stores transitions as (s, a, r, s_, done) tuples using a SumTree. Each sample is assigned a priority which affects retrieval """ def __init__(self, capacity, e=0.01, a=0.6): """ :param capacity: The maximum number of samples that can be stored :param e: Ensures that no sample has 0 priority :param a: """ self.capacity = capacity self.e = e self.a = a self.tree = SumTree(capacity) def _getPriority(self, error): return (error + self.e)**self.a def add(self, error, sample): """ Adds a new sample to the buffer :param error: The error associated with the sample :param sample: The sample to add """ p = self._getPriority(error) self.tree.add(p, sample) def sample(self, n): """ Returns n samples from the buffer :param n: The number of samples to return """ batch = [] segment = self.tree.total() / n for i in range(n): a = segment * i b = segment * (i + 1) s = random.uniform(a, b) (idx, p, data) = self.tree.get(s) batch.append((idx, data)) return batch def update(self, idx, error): p = self._getPriority(error) self.tree.update(idx, p)
class Memory: # stored as ( s, a, r, s_ ) in SumTree def __init__(self, capacity): self.tree = SumTree(capacity) self.max_p = 1 self.e = 0.0 self.a = 0.6 def _getPriority(self, error): return (error + self.e)**self.a def length(self): return self.tree.write def add(self, sample, error): p = self._getPriority(error) self.tree.add(p, sample) def add_p(self, p, sample): self.tree.add(p, sample) def sample(self, n): batch = [] idx_batch = [] segment = self.tree.total() / n for i in range(n): a = segment * i b = segment * (i + 1) s = random.uniform(a, b) (idx, p, data) = self.tree.get(s) batch.append(data) idx_batch.append(idx) return batch, idx_batch def update(self, idx, error): p = self._getPriority(error) if p > self.max_p: self.max_p = p self.tree.update(idx, p) def update_batch(self, idx_batch, error_batch): p_batch = self._getPriority(error_batch) if np.max(p_batch) > self.max_p: self.max_p = np.max(p_batch) self.tree.update_batch(idx_batch, p_batch)
class Memory(object): # stored as ( s, a, r, s_ ) in SumTree """ This SumTree code is modified version and the original code is from: https://github.com/jaara/AI-blog/blob/master/Seaquest-DDQN-PER.py """ epsilon = 0.01 # small amount to avoid zero priority alpha = 0.6 # [0~1] convert the importance of TD error to priority beta = 0.4 # importance-sampling, from initial value increasing to 1 beta_increment_per_sampling = 0.001 abs_err_upper = 1. # clipped abs error def __init__(self, capacity): self.tree = SumTree(capacity) def store(self, transition): max_p = np.max(self.tree.tree[-self.tree.capacity:]) if max_p == 0: max_p = self.abs_err_upper self.tree.add(max_p, transition) # set the max p for new p def sample(self, n): # b_idx, b_memory, ISWeights = np.empty((n,), dtype=np.int32), np.empty((n, self.tree.data[0].size)), np.empty((n, 1)) b_idx, b_memory, ISWeights = deque(), deque(), deque() pri_seg = self.tree.total_p / n # priority segment self.beta = np.min([1., self.beta + self.beta_increment_per_sampling]) # max = 1 max_prob = np.max(self.tree.tree[-self.tree.capacity:]) / self.tree.total_p # for later calculate ISweight for i in range(n): a, b = pri_seg * i, pri_seg * (i + 1) v = np.random.uniform(a, b) idx, p, data = self.tree.get_leaf(v) prob = p / self.tree.total_p # ISWeights[i, 0] = np.power(prob/max_prob, -self.beta) ISWeights.append(np.power(prob/max_prob, -self.beta)) # b_idx[i], b_memory[i, :] = idx, data b_idx.append(idx) b_memory.append(data) return np.array(list(b_idx)), np.array(list(b_memory)), np.reshape(np.array(list(ISWeights)), (n, 1)) def batch_update(self, tree_idx, abs_errors): abs_errors += self.epsilon # convert to abs and avoid 0 clipped_errors = np.minimum(abs_errors, self.abs_err_upper) ps = np.power(clipped_errors, self.alpha) for ti, p in zip(tree_idx, ps): self.tree.update(ti, p)
class Memory: # Constants e = 0.01 a = 0.0 #0.6 # Initialize memory def __init__(self, capacity): self.tree = SumTree(capacity) self.capacity = capacity self.len = 0 # Calculate error priority def getPriority(self, error): return (error + self.e)**self.a # Add sample to the memory def add(self, error, sample): p = self.getPriority(error) self.tree.add(p, sample) self.len = min(self.len + 1, self.capacity) # Generate 'n' random samples from the memory def sample(self, n): batch = [] segment = self.tree.total() / n for i in range(n): a = segment * i b = segment * (i + 1) s = random.uniform(a, b) (idx, p, data) = self.tree.get(s) batch.append((idx, data)) return batch # Number of current samples in memory def numberSamples(self): return self.len # Update priority of error def update(self, idx, error): p = self.getPriority(error) self.tree.update(idx, p)
class PrioritizedMemory(Memory): def __init__(self, capacity, epsilon=0.01, alpha=0.6, beta=0.4, beta_increment=0.001): self.epsilon = epsilon self.alpha = alpha self.beta = beta self.beta_increment = beta_increment self.capacity = capacity self.tree = SumTree(self.capacity) def _compute_priority(self, loss): return (np.abs(loss) + self.epsilon)**self.alpha def push(self, *args): priority = self.tree.max() priority = 1 if priority <= 0 else priority self.tree.add(priority, Transition(*args)) def sample(self, batch_size): batch = [] indices = [] weights = np.empty(batch_size, dtype='float32') self.beta += self.beta_increment beta = np.minimum(1., self.beta) total = self.tree.total() for i, r in enumerate(np.random.uniform(0, total, (batch_size, ))): index, priority, data = self.tree.get(r) batch.append(data) indices.append(index) weights[i] = (self.capacity * priority / total)**(-beta) return batch, indices, weights / weights.max() def update(self, index, loss): priority = self._compute_priority(loss) self.tree.update(index, priority) def __len__(self): return self.tree.n_entries
class Replay_Memory: def __init__(self): self.memory_len = 10000 self.memory_bias = .01 self.memory_pow = .6 self.tree = SumTree(self.memory_len) def add(self, error, sample): priority = (error + self.memory_bias)**self.memory_pow self.tree.add(priority, sample) def sample(self, batch_size): """ Get a sample batch of the replay memory Returns: batch: a batch with one sample from each segment of the memory """ batch = [] #we want one representative of all distribution-segments in the batch #e.g BATCH_SIZE=2: batch contains one sample from [min,median] #and from [median,max] segment = self.tree.total() / batch_size for i in range(batch_size): minimum = segment * i maximum = segment * (i + 1) s = random.uniform(minimum, maximum) (idx, _, data) = self.tree.get(s) batch.append((idx, data)) return batch def update(self, idx, error): """ Updates one entry in the replay memory Args: idx: the position of the outdated transition in the memory error: the newly calculated error """ priority = (error + self.memory_bias)**self.memory_pow self.tree.update(idx, priority)
class PERMemory(ReplayMemory): epsilon = 0.0001 alpha = 0.6 def __init__(self, CAPACITY): super(PERMemory, self).__init__(CAPACITY) self.tree = SumTree(CAPACITY) self.size = 0 # Proportional prioritizationによるpriorityの計算 def _getPriority(self, td_error): return (td_error + self.epsilon)**self.alpha def push(self, state, action, state_next, reward): """state, action, state_next, rewardをメモリに保存します""" self.size += 1 priority = self.tree.max() if priority <= 0: priority = 1 self.tree.add(priority, Transition(state, action, state_next, reward)) def sample(self, batch_size): data_list = [] indexes = [] for rand in np.random.uniform(0, self.tree.total(), batch_size): (idx, _, data) = self.tree.get(rand) data_list.append(data) indexes.append(idx) return data_list, indexes def update(self, idx, td_error): priority = self._getPriority(td_error) self.tree.update(idx, priority) def __len__(self): return self.size
class PriorityExperienceReplay: ''' Almost copy from https://github.com/jaara/AI-blog/blob/master/Seaquest-DDQN-PER.py ''' def __init__(self, max_size, window_size, input_shape): # set default sumtree self.tree = SumTree(max_size) self._max_size = max_size # dimension for how to store state and next state self._window_size = window_size self._WIDTH = input_shape[0] self._HEIGHT = input_shape[1] # hyperparmeters for priority probability self.e = 0.01 self.a = 0.6 def _getPriority(self, error): # set probability for given experience return (error + self.e)**self.a def append(self, state, action, reward, next_state, done): # add experience to tree with probability computed for s, a, r, n_s, d in zip(state, action, reward, next_state, done): # when first appended, set maximum priority # 0.5 is the maximum error p = self._getPriority(0.5) self.tree.add(p, data=(s, a, r, n_s, d)) def sample(self, batch_size, indexes=None): # set batch for data, index and priority data_batch = [] idx_batch = [] p_batch = [] # split the tree into batch size segment = self.tree.total_and_count()[0] / batch_size # search for high priority # divide into multiple section in tree to search for diverse, yet high priority sampels for i in range(batch_size): a = segment * i b = segment * (i + 1) s = random.uniform(a, b) (idx, p, data) = self.tree.get(s) data_batch.append(data) idx_batch.append(idx) p_batch.append(p) zipped = list(zip(*data_batch)) zipped[0] = np.reshape( zipped[0], (-1, self._WIDTH, self._HEIGHT, self._window_size)) zipped[3] = np.reshape( zipped[3], (-1, self._WIDTH, self._HEIGHT, self._window_size)) sum_p, count = self.tree.total_and_count() return zipped, idx_batch, p_batch, sum_p, count def update(self, idx_list, error_list): # update priority according to td error from current network # repeat after every training step for idx, error in zip(idx_list, error_list): p = self._getPriority(error) self.tree.update(idx, p)
class MemoryBuffer(object): """ Memory Buffer Helper class for Experience Replay using a double-ended queue or a Sum Tree (for PER) """ def __init__(self, buffer_size, with_per = False): """ Initialization """ if(with_per): # Prioritized Experience Replay self.alpha = 0.5 self.epsilon = 0.01 self.buffer = SumTree(buffer_size) else: # Standard Buffer self.buffer = deque() self.count = 0 self.with_per = with_per self.buffer_size = buffer_size def memorize(self, state, action, reward, done, new_state, achieved_goal, goal, error=None): """ Save an experience to memory, optionally with its TD-Error """ experience = (state, action, reward, done, new_state, achieved_goal, goal, error) if(self.with_per): priority = self.priority(error[0]) self.buffer.add(priority, experience) self.count += 1 else: # Check if buffer is already full if self.count < self.buffer_size: self.buffer.append(experience) self.count += 1 else: self.buffer.popleft() self.buffer.append(experience) def priority(self, error): """ Compute an experience priority, as per Schaul et al. """ return (error + self.epsilon) ** self.alpha def size(self): """ Current Buffer Occupation """ return self.count def sample_batch(self, batch_size): """ Sample a batch, optionally with (PER) """ batch = [] # Sample using prorities if(self.with_per): T = self.buffer.total() // batch_size for i in range(batch_size): a, b = T * i, T * (i + 1) s = random.uniform(a, b) idx, error, data = self.buffer.get(s) batch.append((*data, idx)) idx = np.array([i[7] for i in batch]) # Sample randomly from Buffer elif self.count < batch_size: idx = None batch = random.sample(self.buffer, self.count) else: idx = None batch = random.sample(self.buffer, batch_size) # Return a batch of experience s_batch = np.array([i[0] for i in batch]) a_batch = np.array([i[1] for i in batch]) r_batch = np.array([i[2] for i in batch]) d_batch = np.array([i[3] for i in batch]) new_s_batch = np.array([i[4] for i in batch]) ag_batch = np.array([i[5] for i in batch]) g_batch = np.array([i[6] for i in batch]) return s_batch, a_batch, r_batch, d_batch, new_s_batch, ag_batch, g_batch, idx def update(self, idx, new_error): """ Update priority for idx (PER) """ self.buffer.update(idx, self.priority(new_error)) def clear(self): """ Clear buffer / Sum Tree """ if(self.with_per): self.buffer = SumTree(buffer_size) else: self.buffer = deque() self.count = 0
from sumtree import SumTree tree = SumTree(memory_size=10) p = 1 for i in range(p): tree.add(10000, (1, 1, 1, 1, 1)) print("tree",tree.tree) print("transition",tree.transitions)
class PriorityReplayBuffer(object): # TODO: reference https://github.com/rlcode/per/blob/master/prioritized_memory.py e = 0.01 a = 0.6 beta = 0.4 beta_increment_per_sampling = 0.001 absolute_error_upper = 1. def __init__(self, capacity): ''' Initializes PRB. Args: capacity: capacity of backing SumTree ''' self.tree = SumTree(capacity) self.capacity = capacity def _get_priority(self, error): ''' Gets the priority associated with the error. Args: error: input error Returns: the associated priority ''' priority = np.abs(error) + self.e priority = np.minimum(priority, self.absolute_error_upper) return priority**self.a def add(self, error, experience): ''' Adds the experience and error to the SumTree. Args: error: TD error of the sample sample: experience to enter ''' priority = self._get_priority(error) self.tree.add(experience, priority) def sample(self, size): ''' Returns a sample with given size following weighted distribution. Args: size: the desired batch size to receive Returns: the batch of experiences, indexes, and importance sampling weights ''' batch = [] idxs = [] segment = self.tree.total_priority() / size priorities = [] self.beta = np.min([1., self.beta + self.beta_increment_per_sampling]) # p_min = np.min(self.tree.tree[-self.tree.capacity:]) # max_weight = (p_min * size) ** (-self.beta) for i in range(size): a = segment * i b = segment * (i + 1) s = random.uniform(a, b) (idx, p, data) = self.tree.get_leaf(s) priorities.append(p) batch.append(data) idxs.append(idx) sampling_probabilities = priorities / self.tree.total_priority() is_weight = np.power(self.tree.size * sampling_probabilities, -self.beta) is_weight /= is_weight.max() return batch, idxs, is_weight def update(self, idx, error): ''' Updates the tree index with the error. Args: idx: the SumTree index to update error: the error of the experience ''' p = self._get_priority(error) self.tree.update(idx, p)
class Memory: # stored as ( s, a, r, s_ ) in SumTree e = 0.01 a = 0.6 PER_e = 0.01 # Hyperparameter that we use to avoid some experiences to have 0 probability of being taken PER_a = 0.6 # Hyperparameter that we use to make a tradeoff between taking only exp with high priority and sampling randomly PER_b = 0.4 # importance-sampling, from initial value increasing to 1 PER_b_increment_per_sampling = 0.001 absolute_error_upper = 1. def __init__(self, capacity): self.tree = SumTree(capacity) def _getPriority(self, error): return (error + self.e)**self.a def add(self, error, sample): max_priority = np.max(self.tree.tree[-self.tree.capacity:]) if max_priority == 0: max_priority = absolute_error_upper self.tree.add(self._getPriority(max_priority), sample) def sample(self, n): batch = [] # calculate priority segment segment = self.tree.total() / n #b_idx, b_ISWeights = np.empty((n,), dtype=np.int32), np.empty((n,), dtype=np.float32) # Calculate the priority segment # Here, as explained in the paper, we divide the Range[0, ptotal] into n ranges #priority_segment = self.tree.total_priority / n # priority segment # Here we increasing the PER_b each time we sample a new minibatch self.PER_b = np.min( [1., self.PER_b + self.PER_b_increment_per_sampling]) # max = 1 # Calculating the max_weight p_min = np.min( self.tree.tree[-self.tree.capacity:]) / self.tree.total() max_weight = (p_min * n)**(-self.PER_b) for i in range(n): a = segment * i b = segment * (i + 1) s = random.uniform(a, b) (idx, p, data) = self.tree.get(s) sampling_probabilities = p / self.tree.total() # IS = (1/N * 1/P(i))**b /max wi == (N*P(i))**-b /max wi is_weights = np.power(n * sampling_probabilities, -self.PER_b) / max_weight batch.append((idx, data, is_weights)) return batch def update(self, idx, error): p = self._getPriority(error) self.tree.update(idx, p)
class PrioritizedReplayBuffer: e = 1e-5 alpha = 0.6 beta = 0.4 beta_increment_per_sampling = 0.001 def __init__(self, buffer_size, batch_size, seed): self.batch_size = batch_size self.experience = namedtuple( "Experience", field_names=["state", "action", "reward", "next_state", "done"]) self.seed = random.seed(seed) self.tree = SumTree(buffer_size) def _get_priority(self, error): return (error + self.e)**self.alpha def add(self, error, state, action, reward, next_state, done): sample = self.experience(state, action, reward, next_state, done) p = self._get_priority(error) self.tree.add(p, sample) def sample(self): experiences = [] indecis = [] segment = self.tree.total() / self.batch_size priorities = [] self.beta = np.min([1., self.beta + self.beta_increment_per_sampling]) for i in range(self.batch_size): a = segment * i b = segment * (i + 1) s = random.uniform(a, b) (idx, p, data) = self.tree.get(s) if len(data) < 5: raise ValueError("missed data") priorities.append(p) experiences.append(data) indecis.append(idx) sampling_probabilities = priorities / self.tree.total() is_weights = np.power(self.tree.n_entries * sampling_probabilities, -self.beta) is_weights /= is_weights.max() is_weights = torch.from_numpy( np.vstack([w for w in is_weights if w is not None])).float().to(device) states = torch.from_numpy( np.vstack([e.state for e in experiences if e is not None])).float().to(device) actions = torch.from_numpy( np.vstack([e.action for e in experiences if e is not None])).long().to(device) rewards = torch.from_numpy( np.vstack([e.reward for e in experiences if e is not None])).float().to(device) next_states = torch.from_numpy( np.vstack([e.next_state for e in experiences if e is not None])).float().to(device) dones = torch.from_numpy( np.vstack([e.done for e in experiences if e is not None]).astype(np.uint8)).float().to(device) return (indecis, is_weights, states, actions, rewards, next_states, dones) def update(self, idx, error): p = self._get_priority(error) self.tree.update(idx, p)
class MemoryBuffer(object): """ Memory Buffer Helper class for Experience Replay using a double-ended queue or a Sum Tree (for PER) """ def __init__(self, buffer_size, with_per = False): """ Initialization """ if(with_per): # Prioritized Experience Replay self.alpha = 0.5 self.epsilon = 0.01 self.buffer = SumTree(buffer_size) else: # Standard Buffer self.buffer = deque() self.count = 0 self.with_per = with_per self.buffer_size = buffer_size def memorize(self, state, action, reward, done, new_state, error): """ Save an experience to memory, optionally with its TD-Error """ experience = (state, action, reward, done, new_state) if(self.with_per): priority = self.priority(error[0]) self.buffer.add(priority, experience) self.count += 1 def priority(self, error): """ Compute an experience priority, as per Schaul et al. """ return (error + self.epsilon) ** self.alpha def size(self): """ Current Buffer Occupation """ return self.count def sample_batch(self, batch_size): """ Sample a batch, optionally with (PER) """ batch = [] s_batch = [] a_batch = [] r_batch = [] d_batch = [] new_s_batch = [] idx_ = [] # Sample using prorities if(self.with_per): T = self.buffer.total() // batch_size for i in range(batch_size): sum = [] a, b = T * i, T * (i + 1) s = random.uniform(a, b) idx, error, data = self.buffer.get(s) # batch.append((*data, idx)) s_batch.append(data[0]) a_batch.append(data[1]) r_batch.append(data[2]) d_batch.append(data[3]) new_s_batch.append(data[4]) idx_.append(idx) # batch.append((data, idx)) # print batch # idx = np.array([i[5] for i in batch]) # Return a batch of experience # s_batch = np.array([i[0] for i in batch]) # a_batch = np.array([i[1] for i in batch]) # r_batch = np.array([i[2] for i in batch]) # d_batch = np.array([i[3] for i in batch]) # new_s_batch = np.array([i[4] for i in batch]) # idx = np.array([i[5] for i in batch]) return s_batch, a_batch, r_batch, d_batch, new_s_batch, idx_ def update(self, idx, new_error): """ Update priority for idx (PER) """ self.buffer.update(idx, self.priority(new_error)) def clear(self): """ Clear buffer / Sum Tree """ if(self.with_per): self.buffer = SumTree(buffer_size) else: self.buffer = deque() self.count = 0
def train(self): if self.params == 1: top_performers = [] gen_performance = [] for gen in range(self.generations): performance = SumTree(self.size) hp1 = np.random.randint( low=self.param_one[0], high=self.param_one[1], size=(self.size - len(top_performers)) * 2) hps = np.append( np.array(top_performers).reshape((-1, 2)), hp1.reshape((-1, 2))) for hp in hps: # train all models and save performance print(hp) temp = self.model(self.x_train, self.y_train) temp.build(np.mean(np.array([hp[0], hp[1]]))) temp.train(self.epochs) metric = temp.evaluate(self.x_test, self.y_test) performance.add(metric, np.array([hp])) keep_metrics = np.sort(performance.p_array( ))[-int(self.keep):] # array of the highest performing metrics hyperparameters = [ ] # array to store the best n=self.keep performing hyperparameters for metric in keep_metrics: # note that the order of keep metrics is lowest to highest performance _, __, hp_temp = performance.get(metric) hyperparameters = np.append(np.array(hyperparameters), hp_temp) hyperparameters = hyperparameters.reshape((-1, 4)) mated_hp1 = [] for mate in hyperparameters: # mating routine with mendellian inheritance from the two alleles mated_hp1.append(np.random.choice(mate[np.array([0, 1])])) mated_hp1.append( np.random.choice(hyperparameters[np.random.randint( len(hyperparameters))])) top_performers = np.array(mated_hp1).reshape((-1, 2)) print("generation:", gen, " min performance (params, metric):", hyperparameters[0], keep_metrics[0], " max performance:", hyperparameters[-1], keep_metrics[-1]) gen_performance.append(keep_metrics[0]) gen_performance.append(keep_metrics[-1]) self.hyperparameters = hyperparameters self.keep_metrics = keep_metrics return (hyperparameters, keep_metrics, np.array(gen_performance).reshape(-1, 2)) if self.params == 2: top_performers = [] gen_performance = [] os.mkdir("temp") for gen in range(self.generations): performance = SumTree(self.size) hp1 = np.random.randint( low=self.param_one[0], high=self.param_one[1], size=(self.size - len(top_performers)) * 2) hp2 = np.random.randint( low=self.param_two[0], high=self.param_two[1], size=(self.size - len(top_performers)) * 2) hps = np.append( np.array(top_performers).reshape((-1, 4)), np.dstack((hp1, hp2))).reshape((-1, 4)) for hp in hps: # train all models and save performance print(hp) temp = self.model(self.x_train, self.y_train) temp.build(np.mean(np.array([hp[0], hp[2]])), np.mean(np.array([hp[1], hp[3]]))) temp.train(self.epochs) metric = temp.evaluate(self.x_test, self.y_test) performance.add(metric, np.array([hp])) keep_metrics = np.sort(performance.p_array( ))[-int(self.keep):] # array of the highest performing metrics hyperparameters = [ ] # array to store the best n=self.keep performing hyperparameters for metric in keep_metrics: # note that the order of keep metrics is lowest to highest performance _, __, hp_temp = performance.get(metric) hyperparameters = np.append(np.array(hyperparameters), hp_temp) hyperparameters = hyperparameters.reshape((-1, 4)) mated_hp1 = [] mated_hp2 = [] for mate in hyperparameters: # mating routine with mendellian inheritance from the two alleles mated_hp1.append(np.random.choice(mate[np.array([0, 2])])) mated_hp1.append( np.random.choice(hyperparameters[np.random.randint( len(hyperparameters))][np.array([0, 2])])) mated_hp2.append(np.random.choice(mate[np.array([1, 3])])) mated_hp2.append( np.random.choice(hyperparameters[np.random.randint( len(hyperparameters))][np.array([1, 3])])) top_performers = np.dstack( (np.array(mated_hp1), np.array(mated_hp2))).reshape( (-1, 4)) print("generation:", gen, " min performance (params, metric):", hyperparameters[0], keep_metrics[0], " max performance:", hyperparameters[-1], keep_metrics[-1]) gen_performance.append([keep_metrics[0], keep_metrics[-1]]) np.savetxt("temp/gen_perf.csv", np.array(gen_performance).reshape(-1, 2), delimiter=",") os.remove("temp/gen_perf.csv") os.rmdir("temp") self.hyperparameters = hyperparameters self.keep_metrics = keep_metrics return (hyperparameters, keep_metrics, np.array(gen_performance).reshape(-1, 2)) if self.params == 3: top_performers = [] gen_performance = [] for gen in range(self.generations): performance = SumTree(self.size) hp1 = np.random.randint( low=self.param_one[0], high=self.param_one[1], size=(self.size - len(top_performers)) * 2) hp2 = np.random.randint( low=self.param_two[0], high=self.param_two[1], size=(self.size - len(top_performers)) * 2) hp3 = np.random.randint( low=self.param_three[0], high=self.param_three[1], size=(self.size - len(top_performers)) * 2) hps = np.append( np.array(top_performers).reshape((-1, 6)), np.dstack((hp1, hp2, hp3))).reshape((-1, 6)) for hp in hps: # train all models and save performance print(hp) temp = self.model(self.x_train, self.y_train) temp.build(np.mean(np.array([hp[0], hp[3]])), np.mean(np.array([hp[1], hp[4]])), np.mean(np.array([hp[2], hp[5]]))) temp.train(self.epochs) metric = temp.evaluate(self.x_test, self.y_test) performance.add(metric, np.array([hp])) keep_metrics = np.sort(performance.p_array( ))[-int(self.keep):] # array of the highest performing metrics hyperparameters = [ ] # array to store the best n=self.keep performing hyperparameters for metric in keep_metrics: # note that the order of keep metrics is lowest to highest performance _, __, hp_temp = performance.get(metric) hyperparameters = np.append(np.array(hyperparameters), hp_temp) hyperparameters = hyperparameters.reshape((-1, 6)) mated_hp1 = [] mated_hp2 = [] mated_hp3 = [] for mate in hyperparameters: # mating routine with mendellian inheritance from the two alleles mated_hp1.append(np.random.choice(mate[np.array([0, 3])])) mated_hp1.append( np.random.choice(hyperparameters[np.random.randint( len(hyperparameters))][np.array([0, 3])])) mated_hp2.append(np.random.choice(mate[np.array([1, 4])])) mated_hp2.append( np.random.choice(hyperparameters[np.random.randint( len(hyperparameters))][np.array([1, 4])])) mated_hp3.append(np.random.choice(mate[np.array([2, 5])])) mated_hp3.append( np.random.choice(hyperparameters[np.random.randint( len(hyperparameters))][np.array([2, 5])])) top_performers = np.dstack( (np.array(mated_hp1), np.array(mated_hp2), np.array(mated_hp3))).reshape((-1, 6)) print("generation:", gen, " min performance (params, metric):", hyperparameters[0], keep_metrics[0], " max performance:", hyperparameters[-1], keep_metrics[-1]) gen_performance.append([keep_metrics[0], keep_metrics[-1]]) self.hyperparameters = hyperparameters self.keep_metrics = keep_metrics return (hyperparameters, keep_metrics, np.array(gen_performance).reshape(-1, 2)) if self.params == 4: top_performers = [] gen_performance = [] for gen in range(self.generations): performance = SumTree(self.size) hp1 = np.random.randint( low=self.param_one[0], high=self.param_one[1], size=(self.size - len(top_performers)) * 2) hp2 = np.random.randint( low=self.param_two[0], high=self.param_two[1], size=(self.size - len(top_performers)) * 2) hp3 = np.random.randint( low=self.param_three[0], high=self.param_three[1], size=(self.size - len(top_performers)) * 2) hp4 = np.random.randint( low=self.param_four[0], high=self.param_four[1], size=(self.size - len(top_performers)) * 2) hps = np.append( np.array(top_performers).reshape((-1, 8)), np.dstack((hp1, hp2, hp3, hp4))).reshape((-1, 8)) for hp in hps: # train all models and save performance print(hp) temp = self.model(self.x_train, self.y_train) temp.build(np.mean(np.array([hp[0], hp[4]])), np.mean(np.array([hp[1], hp[5]])), np.mean(np.array([hp[2], hp[6]])), np.mean(np.array([hp[3], hp[7]]))) temp.train(self.epochs) metric = temp.evaluate(self.x_test, self.y_test) performance.add(metric, np.array([hp])) keep_metrics = np.sort(performance.p_array( ))[-int(self.keep):] # array of the highest performing metrics hyperparameters = [ ] # array to store the best n=self.keep performing hyperparameters for metric in keep_metrics: # note that the order of keep metrics is lowest to highest performance _, __, hp_temp = performance.get(metric) hyperparameters = np.append(np.array(hyperparameters), hp_temp) hyperparameters = hyperparameters.reshape((-1, 8)) mated_hp1 = [] mated_hp2 = [] mated_hp3 = [] mated_hp4 = [] for mate in hyperparameters: # mating routine with mendellian inheritance from the two alleles mated_hp1.append(np.random.choice(mate[np.array([0, 4])])) mated_hp1.append( np.random.choice(hyperparameters[np.random.randint( len(hyperparameters))][np.array([0, 4])])) mated_hp2.append(np.random.choice(mate[np.array([1, 5])])) mated_hp2.append( np.random.choice(hyperparameters[np.random.randint( len(hyperparameters))][np.array([1, 5])])) mated_hp3.append(np.random.choice(mate[np.array([2, 6])])) mated_hp3.append( np.random.choice(hyperparameters[np.random.randint( len(hyperparameters))][np.array([2, 6])])) mated_hp4.append(np.random.choice(mate[np.array([3, 7])])) mated_hp4.append( np.random.choice(hyperparameters[np.random.randint( len(hyperparameters))][np.array([3, 7])])) top_performers = np.dstack( (np.array(mated_hp1), np.array(mated_hp2), np.array(mated_hp3), np.array(mated_hp4))).reshape( (-1, 8)) print("generation:", gen, " min performance (params, metric):", hyperparameters[0], keep_metrics[0], " max performance:", hyperparameters[-1], keep_metrics[-1]) gen_performance.append([keep_metrics[0], keep_metrics[-1]]) self.hyperparameters = hyperparameters self.keep_metrics = keep_metrics return (hyperparameters, keep_metrics, np.array(gen_performance).reshape(-1, 2)) if self.params == 5: top_performers = [] gen_performance = [] for gen in range(self.generations): performance = SumTree(self.size) hp1 = np.random.randint( low=self.param_one[0], high=self.param_one[1], size=(self.size - len(top_performers)) * 2) hp2 = np.random.randint( low=self.param_two[0], high=self.param_two[1], size=(self.size - len(top_performers)) * 2) hp3 = np.random.randint( low=self.param_three[0], high=self.param_three[1], size=(self.size - len(top_performers)) * 2) hp4 = np.random.randint( low=self.param_four[0], high=self.param_four[1], size=(self.size - len(top_performers)) * 2) hp5 = np.random.randint( low=self.param_five[0], high=self.param_five[1], size=(self.size - len(top_performers)) * 2) hps = np.append( np.array(top_performers).reshape((-1, 10)), np.dstack((hp1, hp2, hp3, hp4, hp5))).reshape((-1, 10)) for hp in hps: # train all models and save performance print(hp) temp = self.model(self.x_train, self.y_train) temp.build(np.mean(np.array([hp[0], hp[5]])), np.mean(np.array([hp[1], hp[6]])), np.mean(np.array([hp[2], hp[7]])), np.mean(np.array([hp[3], hp[8]])), np.mean(np.array([hp[4], hp[9]]))) temp.train(self.epochs) metric = temp.evaluate(self.x_test, self.y_test) performance.add(metric, np.array([hp])) keep_metrics = np.sort(performance.p_array( ))[-int(self.keep):] # array of the highest performing metrics hyperparameters = [ ] # array to store the best n=self.keep performing hyperparameters for metric in keep_metrics: # note that the order of keep metrics is lowest to highest performance _, __, hp_temp = performance.get(metric) hyperparameters = np.append(np.array(hyperparameters), hp_temp) hyperparameters = hyperparameters.reshape((-1, 10)) mated_hp1 = [] mated_hp2 = [] mated_hp3 = [] mated_hp4 = [] mated_hp5 = [] for mate in hyperparameters: # mating routine with mendellian inheritance from the two alleles mated_hp1.append(np.random.choice(mate[np.array([0, 5])])) mated_hp1.append( np.random.choice(hyperparameters[np.random.randint( len(hyperparameters))][np.array([0, 5])])) mated_hp2.append(np.random.choice(mate[np.array([1, 6])])) mated_hp2.append( np.random.choice(hyperparameters[np.random.randint( len(hyperparameters))][np.array([1, 6])])) mated_hp3.append(np.random.choice(mate[np.array([2, 7])])) mated_hp3.append( np.random.choice(hyperparameters[np.random.randint( len(hyperparameters))][np.array([2, 7])])) mated_hp4.append(np.random.choice(mate[np.array([3, 8])])) mated_hp4.append( np.random.choice(hyperparameters[np.random.randint( len(hyperparameters))][np.array([3, 8])])) mated_hp5.append(np.random.choice(mate[np.array([4, 9])])) mated_hp5.append( np.random.choice(hyperparameters[np.random.randint( len(hyperparameters))][np.array([4, 9])])) top_performers = np.dstack( (np.array(mated_hp1), np.array(mated_hp2), np.array(mated_hp3), np.array(mated_hp4), np.array(mated_hp5))).reshape((-1, 10)) print("generation:", gen, " min performance (params, metric):", hyperparameters[0], keep_metrics[0], " max performance:", hyperparameters[-1], keep_metrics[-1]) gen_performance.append([keep_metrics[0], keep_metrics[-1]]) self.hyperparameters = hyperparameters self.keep_metrics = keep_metrics return (hyperparameters, keep_metrics, np.array(gen_performance).reshape(-1, 2))
class PrioritizedBuffer: experience = namedtuple("Experience", field_names=["index","IS_weight","state", "action", "reward", "next_state", "done"]) alpha = 0.6 # mixing pure greedy prioritization and uniform random sampling beta = 0.4 # compensate for the non-uniform probabilities beta_increment_per_sampling = 0.001 epsilon = 0.01 # small amount to avoid zero priority current_length = 0 def __init__(self, size = int(1e5), batch_size = 64, seed = 1234) : self.size = size self.batch_size = batch_size self.seed = random.seed(seed) self.memory = SumTree(capacity = self.size) def push(self, state, action, reward, next_state, done): """push new experience(s) to memory""" max_p = self.memory.tree[-self.memory.capacity:].max() priority = 1.0 if self.current_length == 0 else max_p data = (state, action, reward, next_state, done) self.memory.add(priority, data) self.current_length = self.current_length + 1 def sample(self): sum_priority = self.memory.total() segment = sum_priority/self.batch_size samples = [] for i in range(self.batch_size): a, b = segment * i, segment *(i+1) s = random.uniform(a,b) (idx, priority, data) = self.memory.get(s) p = priority/sum_priority IS_weight = (self.batch_size * p)**(-self.beta) samples.append(self.experience(idx, IS_weight, data[0], data[1], data[2], data[3], data[4])) self.beta = np.min([1., self.beta + self.beta_increment_per_sampling]) # max = 1 batch = self.experience(*zip(*samples)) index = np.asarray(batch.index) IS_weight = np.asarray(batch.IS_weight) max_weight = IS_weight.max() IS_weight = IS_weight/max_weight states = np.asarray(batch.state) actions = np.asarray(batch.action) rewards = np.asarray(batch.reward) next_states = np.asarray(batch.next_state) dones = np.asarray(batch.done).astype(np.uint8) return (index, IS_weight, states, actions, rewards, next_states, dones) def update_priority(self, idxs, td_errors): """update priority for the replayed transitions""" for idx, td_error in zip(idxs, td_errors): priority = (td_error + self.epsilon)**self.alpha self.memory.update(idx, priority) def __len__(self): return self.current_length
class ReplayMemory(object): # stored as ( s, a, r, s_ ) in SumTree """ This SumTree code is modified version and the original code is from: https://github.com/jaara/AI-blog/blob/master/Seaquest-DDQN-PER.py """ PER_e = 0.01 # Hyperparameter that we use to avoid some experiences to have 0 probability of being taken PER_a = 0.6 # Hyperparameter that we use to make a tradeoff between taking only exp with high priority and sampling randomly PER_b = 0.4 # importance-sampling, from initial value increasing to 1 PER_b_increment_per_sampling = 0.001 absolute_error_upper = 1. # clipped abs error def __init__(self, capacity): # Making the tree """ Remember that our tree is composed of a sum tree that contains the priority scores at his leaf And also a data array We don't use deque because it means that at each timestep our experiences change index by one. We prefer to use a simple array and to overwrite when the memory is full. """ self.tree = SumTree(capacity) """ Store a new experience in our tree Each new experience have a score of max_prority (it will be then improved when we use this exp to train our DDQN) """ def store(self, experience): # Find the max priority max_priority = np.max(self.tree.tree[-self.tree.capacity:]) # If the max priority = 0 we can't put priority = 0 since this exp will never have a chance to be selected # So we use a minimum priority if max_priority == 0: max_priority = self.absolute_error_upper self.tree.add(max_priority, experience) # set the max p for new p """ - First, to sample a minibatch of k size, the range [0, priority_total] is / into k ranges. - Then a value is uniformly sampled from each range - We search in the sumtree, the experience where priority score correspond to sample values are retrieved from. - Then, we calculate IS weights for each minibatch element """ def sample(self, n): # Create a sample array that will contains the minibatch memory_b = [] b_idx, b_ISWeights = np.empty((n, ), dtype=np.int32), np.empty( (n, 1), dtype=np.float32) # Calculate the priority segment # Here, as explained in the paper, we divide the Range[0, ptotal] into n ranges priority_segment = self.tree.total_priority / n # priority segment # Here we increasing the PER_b each time we sample a new minibatch self.PER_b = np.min( [1., self.PER_b + self.PER_b_increment_per_sampling]) # max = 1 # Calculating the max_weight p_min = np.min( self.tree.tree[-self.tree.capacity:]) / self.tree.total_priority max_weight = (p_min * n)**(-self.PER_b) for i in range(n): """ A value is uniformly sample from each range """ a, b = priority_segment * i, priority_segment * (i + 1) value = np.random.uniform(a, b) """ Experience that correspond to each value is retrieved """ index, priority, data = self.tree.get_leaf(value) #P(j) sampling_probabilities = priority / self.tree.total_priority # IS = (1/N * 1/P(i))**b /max wi == (N*P(i))**-b /max wi b_ISWeights[i, 0] = np.power(n * sampling_probabilities, -self.PER_b) / max_weight b_idx[i] = index experience = [data] memory_b.append(experience) return b_idx, memory_b, b_ISWeights """ Update the priorities on the tree """ def batch_update(self, tree_idx, abs_errors): abs_errors += self.PER_e # convert to abs and avoid 0 clipped_errors = np.minimum(abs_errors, self.absolute_error_upper) ps = np.power(np.absolute(clipped_errors), self.PER_a) for ti, p in zip(tree_idx, ps): self.tree.update(ti, p) def __len__(self): return np.sum(self.tree.data != 0)