def add_base_feature(self, center, dim, Q): """ adds a new 1-dimensional feature and returns its index """ new_f = KernelizedFeature( center=center, dim=[dim], kernel_args=self.kernel_args, kernel=self.kernel, index=self.features_num, ) self.features.append(new_f) self.base_id_sets.add(new_f.base_ids) self.sorted_ids.push(-1, self.features_num) self.logger.debug("Added Feature {} {}".format(self.features_num, new_f)) # add combinations with all existing features as candidates new_cand = {(f, self.features_num): Candidate(f, self.features_num) for f in range(self.features_num) if dim not in self.features[f].dim} self.candidates.update(new_cand) for f, _ in list(new_cand.keys()): self.base_id_sets.add(new_f.base_ids | self.features[f].base_ids) self.features_num += 1 # add parameter dimension if self.normalization: self.weight = add_new_features(self.weight, Q) else: self.weight = add_new_features(self.weight) return self.features_num - 1
def add_refined_feature(self, index1, index2, Q): """ adds the combination of 2 existing features to the representation """ f1 = self.features[index1] f2 = self.features[index2] new_center = np.zeros_like(f1.center) cnt = np.zeros_like(f1.center) cnt[f1.dim] += 1 cnt[f2.dim] += 1 cnt[cnt == 0] = 1.0 new_center[f1.dim] += f1.center[f1.dim] new_center[f2.dim] += f2.center[f2.dim] new_center /= cnt new_dim = list(frozenset(f1.dim) | frozenset(f2.dim)) new_base_ids = f1.base_ids | f2.base_ids new_dim.sort() new_f = KernelizedFeature( center=new_center, dim=new_dim, kernel_args=self.kernel_args, kernel=self.kernel, index=self.features_num, base_ids=new_base_ids, ) self.features.append(new_f) # Priority is the negative number of base ids self.sorted_ids.push(-len(new_f.base_ids), self.features_num) # assert(len(self.sorted_ids.toList()) == self.features_num + 1) self.base_id_sets.add(new_f.base_ids) del self.candidates[(index1, index2)] # add new candidates new_cand = { (f, self.features_num): Candidate(f, self.features_num) for f in range(self.features_num) if (self.features[f].base_ids | new_base_ids) not in self.base_id_sets and len(frozenset(self.features[f].dim) & frozenset(new_dim)) == 0 } for c, _ in list(new_cand.keys()): self.base_id_sets.add(new_base_ids | self.features[c].base_ids) self.candidates.update(new_cand) self.logger.debug("Added refined feature {} {}".format( self.features_num, new_f)) self.logger.debug("{} candidates".format(len(self.candidates))) self.features_num += 1 if self.normalization: self.weight = add_new_features(self.weight, Q) else: self.weight = add_new_features(self.weight) return self.features_num - 1
def _expand_vectors(self, num_expansions): """ correct size of GQ weight and e-traces when new features were expanded """ new_elem = np.zeros((self.representation.num_actions, num_expansions)) self.gqweight = add_new_features(self.gqweight, new_elem) if self.lambda_: # Correct the size of eligibility traces (pad with zeros for new # features) self.eligibility_trace = add_new_features( self.eligibility_trace, self.representation.num_actions, new_elem ) self.eligibility_trace_s = add_new_features( self.eligibility_trace_s, np.zeros((1, num_expansions)) )
def learn(self, s, p_actions, a, r, ns, np_actions, na, terminal): # The previous state could never be terminal # (otherwise the episode would have already terminated) prevStateTerminal = False self.representation.pre_discover(s, prevStateTerminal, a, ns, terminal) discount_factor = self.discount_factor phi_s = self.representation.phi(s, prevStateTerminal) phi = self.representation.phi_sa(s, prevStateTerminal, a, phi_s) phi_prime_s = self.representation.phi(ns, terminal) # here comes the difference between SARSA and Q-Learning na = self._future_action(ns, terminal, np_actions, phi_prime_s, na) phi_prime = self.representation.phi_sa(ns, terminal, na, phi_prime_s) nnz = count_nonzero(phi_s) # Number of non-zero elements # Set eligibility traces: if self.lambda_ > 0: expanded = (phi.shape[0] - self.eligibility_trace.shape[0] ) // self.representation.num_actions if expanded > 0: # Correct the size of eligibility traces (pad with zeros for # new features) new_trace = add_new_features( self.eligibility_trace.reshape( self.representation.num_actions, -1), np.zeros((self.representation.num_actions, expanded)), ) self.eligibility_trace = new_trace.flatten() self.eligibility_trace *= discount_factor * self.lambda_ self.eligibility_trace += phi # Set max to 1 self.eligibility_trace[self.eligibility_trace > 1] = 1 else: self.eligibility_trace = phi td_error = r + np.dot(discount_factor * phi_prime - phi, self.representation.weight_vec) if nnz > 0: self.updateLearnRate(phi, phi_prime, self.eligibility_trace, discount_factor, nnz, terminal) weight_old = self.representation.weight.copy() self.representation.weight_vec += ( self.learn_rate * self.representation.feature_learning_rate() * td_error * self.eligibility_trace) if not np.all(np.isfinite(self.representation.weight_vec)): self.representation.weight = weight_old import warnings warnings.warn( "WARNING: TD-Learning diverged, weight_vec reached infinity!" ) # Discover features if the representation has the discover method expanded = self.representation.post_discover(s, prevStateTerminal, a, td_error, phi_s) if terminal: # If THIS state is terminal: self.episode_terminated()
def updateWeight(self, p1_index, p2_index): """ Add a new weight corresponding to the new added feature for all actions. The new weight is set to zero if sparsify = False, and equal to the sum of weights corresponding to the parents if sparsify = True """ if self.sparsify: new_elem = self.weight[:, p1_index] + self.weight[:, p2_index] else: new_elem = None self.weight = add_new_features(self.weight, new_elem) # We dont want to reuse the hased phi because phi function is changed! self.hashed_s = None
def add_new_weight(self): """ Add a new zero weight, corresponding to a newly added feature, to all actions. """ self.weight = add_new_features(self.weight)