Example #1
0
    def add_base_feature(self, center, dim, Q):
        """
        adds a new 1-dimensional feature and returns its index
        """
        new_f = KernelizedFeature(center=center,
                                  dim=[dim],
                                  kernel_args=self.kernel_args,
                                  kernel=self.kernel,
                                  index=self.features_num)
        self.features.append(new_f)

        self.base_id_sets.add(new_f.base_ids)
        self.sorted_ids.push(-1, self.features_num)
        self.logger.debug("Added Feature {} {}".format(self.features_num,
                                                       new_f))

        # add combinations with all existing features as candidates
        new_cand = {(f, self.features_num): Candidate(f, self.features_num)
                    for f in range(self.features_num)
                    if dim not in self.features[f].dim}

        self.candidates.update(new_cand)
        for f, _ in list(new_cand.keys()):
            self.base_id_sets.add(new_f.base_ids | self.features[f].base_ids)
        self.features_num += 1

        # add parameter dimension
        if self.normalization:
            self.weight_vec = addNewElementForAllActions(
                self.weight_vec, self.domain.actions_num, Q)
        else:
            self.weight_vec = addNewElementForAllActions(
                self.weight_vec, self.domain.actions_num)
        return self.features_num - 1
Example #2
0
    def add_refined_feature(self, index1, index2, Q):
        """
        adds the combination of 2 existing features to the representation
        """
        f1 = self.features[index1]
        f2 = self.features[index2]
        new_center = np.zeros_like(f1.center)
        cnt = np.zeros_like(f1.center)
        cnt[f1.dim] += 1
        cnt[f2.dim] += 1
        cnt[cnt == 0] = 1.
        new_center[f1.dim] += f1.center[f1.dim]
        new_center[f2.dim] += f2.center[f2.dim]
        new_center /= cnt
        new_dim = list(frozenset(f1.dim) | frozenset(f2.dim))
        new_base_ids = f1.base_ids | f2.base_ids
        new_dim.sort()
        new_f = KernelizedFeature(center=new_center,
                                  dim=new_dim,
                                  kernel_args=self.kernel_args,
                                  kernel=self.kernel,
                                  index=self.features_num,
                                  base_ids=new_base_ids)
        self.features.append(new_f)
        # Priority is the negative number of base ids
        self.sorted_ids.push(-len(new_f.base_ids), self.features_num)
        #assert(len(self.sorted_ids.toList()) == self.features_num + 1)
        self.base_id_sets.add(new_f.base_ids)
        del self.candidates[(index1, index2)]

        # add new candidates
        new_cand = {
            (f, self.features_num): Candidate(f, self.features_num)
            for f in range(self.features_num)
            if (self.features[f].base_ids
                | new_base_ids) not in self.base_id_sets
            and len(frozenset(self.features[f].dim) & frozenset(new_dim)) == 0
        }
        for c, _ in list(new_cand.keys()):
            self.base_id_sets.add(new_base_ids | self.features[c].base_ids)
        self.candidates.update(new_cand)
        self.logger.debug("Added refined feature {} {}".format(
            self.features_num, new_f))
        self.logger.debug("{} candidates".format(len(self.candidates)))
        self.features_num += 1
        if self.normalization:
            self.weight_vec = addNewElementForAllActions(
                self.weight_vec, self.domain.actions_num, Q)
        else:
            self.weight_vec = addNewElementForAllActions(
                self.weight_vec, self.domain.actions_num)

        return self.features_num - 1
Example #3
0
 def addNewWeight(self):
     """
     Add a new zero weight, corresponding to a newly added feature,
     to all actions.
     """
     self.weight_vec = addNewElementForAllActions(self.weight_vec,
                                                  self.actions_num)
Example #4
0
    def learn(self, s, p_actions, a, r, ns, np_actions, na, terminal):
        # The previous state could never be terminal
        # (otherwise the episode would have already terminated)
        prevStateTerminal = False

        self.representation.pre_discover(s, prevStateTerminal, a, ns, terminal)
        discount_factor = self.discount_factor
        weight_vec = self.representation.weight_vec
        phi_s = self.representation.phi(s, prevStateTerminal)
        phi = self.representation.phi_sa(s, prevStateTerminal, a, phi_s)
        phi_prime_s = self.representation.phi(ns, terminal)
        na = self._future_action(
            ns, terminal, np_actions, phi_prime_s,
            na)  # here comes the difference between SARSA and Q-Learning
        phi_prime = self.representation.phi_sa(ns, terminal, na, phi_prime_s)
        nnz = count_nonzero(phi_s)  # Number of non-zero elements

        # Set eligibility traces:
        if self.lambda_:
            expanded = old_div((- len(self.eligibility_trace) + len(phi)), \
                self.representation.actions_num)
            if expanded > 0:
                # Correct the size of eligibility traces (pad with zeros for
                # new features)
                self.eligibility_trace = addNewElementForAllActions(
                    self.eligibility_trace, self.representation.actions_num,
                    np.zeros((self.representation.actions_num, expanded)))

            self.eligibility_trace *= discount_factor * self.lambda_
            self.eligibility_trace += phi

            # Set max to 1
            self.eligibility_trace[self.eligibility_trace > 1] = 1
        else:
            self.eligibility_trace = phi

        td_error = r + np.dot(discount_factor * phi_prime - phi, weight_vec)
        if nnz > 0:
            self.updateLearnRate(phi, phi_prime, self.eligibility_trace,
                                 discount_factor, nnz, terminal)
            weight_vec_old = weight_vec.copy()
            weight_vec               += self.learn_rate * \
                self.representation.featureLearningRate() * \
                td_error * self.eligibility_trace
            if not np.all(np.isfinite(weight_vec)):
                weight_vec = weight_vec_old
                print(
                    "WARNING: TD-Learning diverged, weight_vec reached infinity!"
                )
        # Discover features if the representation has the discover method
        expanded = self.representation.post_discover(s, prevStateTerminal, a,
                                                     td_error, phi_s)

        if terminal:
            # If THIS state is terminal:
            self.episodeTerminated()
Example #5
0
 def updateWeight(self, p1_index, p2_index):
     # Add a new weight corresponding to the new added feature for all actions.
     # The new weight is set to zero if sparsify = False, and equal to the
     # sum of weights corresponding to the parents if sparsify = True
     a = self.domain.actions_num
     # Number of feature before adding the new one
     f = self.features_num - 1
     if self.sparsify:
         newElem = (self.weight_vec[p1_index::f] +
                    self.weight_vec[p2_index::f]).reshape((-1, 1))
     else:
         newElem = None
     self.weight_vec = addNewElementForAllActions(self.weight_vec, a,
                                                  newElem)
     # We dont want to reuse the hased phi because phi function is changed!
     self.hashed_s = None