def parallel_sampling_keepU(self, step, eta, run, rate, T=500, r0=.5, tm=20, ts=2, reset=0, gamma=1, steps=100, start_state=0, mode='fix', epsilon=0, max_iter=np.inf): np.random.seed(run) # maybe put this outside function to continue learning instead of fresh start self.unlearn(r0, mode) pTerminal = 1 - np.sum(self.W[:-self.pop_size, :-self.pop_size], axis=0) pi = np.zeros((steps, self.num_states), dtype=int) r = np.zeros(steps, dtype=float) DKL = np.zeros((steps, 2), dtype=float) RMSE = np.zeros((steps, 2), dtype=float) if isinstance(T, (int, long, float, complex)): Tmax = T else: Tmax = T[1] uinit = np.zeros(self.K) uinit[self.r] = rate / 1000. for t in xrange(steps): print 'run', run, ' trial', t, '/', steps stdout.flush() res = cfn.runpopU_js(self.W / self.pop_size, uinit, step, self.pop_size, rate, Tmax, tm, ts, 1. * reset / self.pop_size, run) uinit = res[1] pi[t] = self.get_policy(step, res[0], T) for s in xrange(self.num_states): for a in xrange(self.num_actions): ns, rr = self.next_SR(s, a) pTerminal = self.update_weights_nonepisodic4DKL( pTerminal, s, a, rr, ns, eta, gamma) r[t] = self.R4Pi(pi[t], start_state, gamma, epsilon, max_iter) dd = self.DKL4weights(self.W, pTerminal, gamma) DKL[t] = np.array([np.mean(dd[0]), np.mean(dd[1])]) RMSE[t] = np.sqrt(self.MSE4weights(self.W, pTerminal, gamma)) return np.array([pi, r, DKL, RMSE, np.copy(self.W)])
def parallel_sampling_keepU(self, step, eta, run, rate, T=500, r0=.5, tm=20, ts=2, reset=0, gamma=1, trials=1000, mode='fix', maxsteps=300, initpv=None, initW=None, samples=100): np.random.seed(run) if initW is None: self.unlearn(r0, mode) else: self.W = initW if isinstance(T, (int, long, float, complex)): Tmax = T else: Tmax = T[1] seq = [[None]] * trials # sequences might have differnt length -> list instead array scount = np.zeros((trials, self.K)) uinit = np.zeros(self.K) uinit[self.r] = rate / 1000. for t in xrange(trials): print 'run', run, ' trial', t, '/', trials stdout.flush() pvls = [[8, 16]] if initpv is None else [initpv] a = [] r = [] res = cfn.runpopU_js(self.W / self.pop_size, uinit, step, self.pop_size, rate, Tmax, tm, ts, 1. * reset / self.pop_size, run) uinit = res[1] scount[t] = np.sum(res[0], axis=0) for counter in xrange(maxsteps): a += [self.get_a(scount[t], *pvls[-1])] pvls += [cf.get_next_pv(pvls[-1][0], pvls[-1][1], a[-1])] r += [cf.get_R(*pvls[-1])] seq[t] = [pvls[:-1], a, r] for i in range(samples): p = 16 * np.random.rand() v = 32 * np.random.rand() for a in range(3): pvs = cf.get_next_pvs(p, v, a, self.steps) rr = cf.get_R(*pvs) self.update_weights_continuous([p, v], a, rr, pvs, eta, gamma**self.steps) return np.array([scount, np.array(seq), np.copy(self.W)])
yerr=np.std(perf, axis=0) / np.sqrt(len(perf))) pl.xticks([0, 50, 100], [0, 50, 100]) pl.yticks([0, .5, 1.0], [0, .5, 1.0]) pl.ylim([0, 1]) pl.xlabel('Trials') pl.ylabel('Performance') simpleaxis(pl.gca()) pl.tight_layout(0) pl.savefig('learn_performance.pdf', dpi=600) # initialize our network at two different points in the state space of its neural # activities that correspond to representing the same (approximate) value function uu0 = .2 res0 = [cfn.runpopU_js(W, uu0 * np.ones(net.K), step, 1, rate, 1000, 20, 2, ref, run)[0] for run in range(30)] res1 = [cfn.runpopU_js(W, np.hstack([np.ravel( np.outer(np.ones(net.K / 3), [-12, uu0 * .75, 2.25 * uu0])), np.array([1])]), step, 1, rate, 1000, 20, 2, ref, run)[0] for run in range(30)] pl.figure(figsize=(6, 6)) for i in range(4): p, v = [(0, 17), (8, 17), (5, 22), (2, 18), (10, 27)][i] pl.plot(smooth_spikes(np.sum([ net.get_Q(s, p, v) for s in np.mean(res0, 0)], 1), 40., step, 3 * uu0) / rate, c=col[i], zorder=10) pl.plot(smooth_spikes(np.sum([ net.get_Q(s, p, v) for s in np.mean(res1, 0)], 1), 40., step, 3 * uu0) / rate, '--', c=col[i], zorder=10)
yerr=np.std(perf, axis=0) / np.sqrt(len(perf))) pl.xticks([0, 50, 100], [0, 50, 100]) pl.yticks([0, .5, 1.0], [0, .5, 1.0]) pl.ylim([0, 1]) pl.xlabel('Trials') pl.ylabel('Performance') simpleaxis(pl.gca()) pl.tight_layout(0) pl.savefig('learn_performance.pdf', dpi=600) # initialize our network at two different points in the state space of its neural # activities that correspond to representing the same (approximate) value function uu0 = .2 res0 = [ cfn.runpopU_js(W, uu0 * np.ones(net.K), step, 1, rate, 1000, 20, 2, ref, run)[0] for run in range(30) ] res1 = [ cfn.runpopU_js( W, np.hstack([ np.ravel(np.outer(np.ones(net.K / 3), [-12, uu0 * .75, 2.25 * uu0])), np.array([1]) ]), step, 1, rate, 1000, 20, 2, ref, run)[0] for run in range(30) ] pl.figure(figsize=(6, 6)) for i in range(4): p, v = [(0, 17), (8, 17), (5, 22), (2, 18), (10, 27)][i] pl.plot(smooth_spikes(