Esempio n. 1
0
 def update_weights_continuous(self, s, a, r, ns, eta, gamma=1):
     """
     Update weights given state, action, reward and next state  using delta rule update
         s, a, r, ns: state, action, reward, next state
         eta: learning rate
         gamma: discount factor
     """
     # (neuronindex/popsize)/num_actions = stateindex
     # (neuronindex/popsize)%num_actions = actionindex
     # stateindex/17 = positionindex
     # stateindex%17 = velocityindex
     p, v = s
     pp, vv = ns
     Swphi = np.dot((self.W + self.competition).T[:, :-1],
                    np.array([0 if (a != i / self.pop_size % 3) else
                              cf.phi((p - i / self.pop_size / (3 * self.num_v) + 8) % 16 - 8) *
                              cf.phi(v - (i / self.pop_size / 3) % self.num_v)
                              for i in xrange(self.K - 1)]))
     Intphia = [cf.phi((pp - i / self.pop_size / (3 * self.num_v) + 8) % 16 - 8) *
                cf.phi(vv - (i / self.pop_size / 3) % self.num_v) * gamma
                for i in xrange(self.K - 1)]
     Intphia += [r]
     factor = eta * (np.array(Intphia) - Swphi)
     for i in xrange(self.num_states):
         # skip update for neurons 'far away'
         if abs(i / self.num_v - s[0]) > 2 or abs(i % self.num_v - s[1]) > 2:
             continue
         for k in xrange(self.pop_size):
             self.W[self.sa[i, a, k]] += factor *\
                 cf.phi((p - i / self.num_v + 8) % 16 - 8) *\
                 cf.phi(v - i % self.num_v)
Esempio n. 2
0
 def get_Q(self, x, p, v):
     return np.dot(np.mean((x * (x > 0))[self.sa], axis=2).T, np.array([cf.phi((p - i / self.num_v + 8) % 16 - 8) *
                                                                        cf.phi(v - i % self.num_v) for i in xrange(self.num_states)]))