コード例 #1
0
    def parallel_sampling_keepU(self, step, eta, run, rate, T=500, r0=.5, tm=20, ts=2,
                                reset=0, gamma=1, trials=1000, mode='fix',  maxsteps=300,
                                initpv=None, initW=None, samples=100):
        np.random.seed(run)
        if initW is None:
            self.unlearn(r0, mode)
        else:
            self.W = initW
        if isinstance(T, (int, long, float, complex)):
            Tmax = T
        else:
            Tmax = T[1]
        seq = [[None]] * trials  # sequences might have differnt length -> list instead array
        scount = np.zeros((trials, self.K))
        uinit = np.zeros(self.K)
        uinit[self.r] = rate / 1000.
        for t in xrange(trials):
            print 'run', run, '  trial', t, '/', trials
            stdout.flush()
            pvls = [[8, 16]] if initpv is None else [initpv]
            a = []
            r = []
            res = cfn.runpopU_js(self.W / self.pop_size, uinit, step, self.pop_size, rate,
                                 Tmax, tm, ts, 1. * reset / self.pop_size, run)
            uinit = res[1]
            scount[t] = np.sum(res[0], axis=0)
            for counter in xrange(maxsteps):
                a += [self.get_a(scount[t], *pvls[-1])]
                pvls += [cf.get_next_pv(pvls[-1][0], pvls[-1][1], a[-1])]
                r += [cf.get_R(*pvls[-1])]
            seq[t] = [pvls[:-1], a, r]
            for i in range(samples):
                p = 16 * np.random.rand()
                v = 32 * np.random.rand()
                for a in range(3):
                    pvs = cf.get_next_pvs(p, v, a, self.steps)
                    rr = cf.get_R(*pvs)
                    self.update_weights_continuous([p, v], a, rr, pvs,
                                                   eta, gamma**self.steps)

        return np.array([scount, np.array(seq), np.copy(self.W)])
コード例 #2
0
    def __init__(self, pop_size=1, steps=3, set_W=True):
        # reward_cpt(s,a,r) = p(r|s,a)
        self.num_p = num_p = 16
        self.num_v = num_v = 33
        num_s = num_p * num_v
        reward_cpt = np.ones((num_s, 3))
        self.steps = steps
        try:
            tmp = np.load('cpt.npz')
            transition_cpt = tmp['transition_cpt']
            reward_sizes = tmp['reward_sizes']
            print 'CPTs loaded'
            stdout.flush()
        except:
            print 'Creating CPTs'
            stdout.flush()
            # transition_cpt(s,a,s') = p(s'|a,s)
            # 0 = reverse (-1), 1 = no throttle (0), 2 = forward (1)
            transition_cpt = np.ones((num_s, 3, num_s), dtype=float) / 3.
            reward_sizes = np.zeros((num_s, 3))

            def pos(s):
                return self.pos(s)

            def vel(s):
                return self.vel(s)
            overlap = np.array([quad(lambda p: self.phi(p) * self.phi(j - p), -
                                     np.inf, np.inf, epsabs=1e-20) for j in range(max(num_p, num_v))])
            overlap[overlap[:, 0] < overlap[:, 1], 0] = 0
            overlap = overlap[:, 0]
            Kp = np.array([[overlap[min(abs(i - j), 16 - abs(i - j))]
                            for i in range(num_p)] for j in range(num_p)])
            Kv = np.array([[overlap[abs(i - j)]
                            for i in range(num_v)] for j in range(num_v)])
            self.invKp = invKp = np.linalg.inv(Kp)
            self.invKv = invKv = np.linalg.inv(Kv)

            def phi(i):
                sigma = .5
                return 0 if abs(i) > 5 else np.exp(-i**2 / (2 * sigma**2)) / (2 * np.pi)**(1. / 2) / sigma
            for i in xrange(num_s):
                for a in xrange(3):
                    integral = [cf.trapz2d(i, n, a, steps=steps) for n in xrange(num_s)]
                    for k in xrange(num_s):
                        transition_cpt[i, a, k] = np.sum([invKp[pos(k), pos(n)] * invKv[vel(k), vel(n)] *
                                                          integral[n] for n in xrange(num_s)])

                reward_sizes[i] = dblquad(lambda v, p: phi(pos(i) - p) * phi(vel(i) - v) * cf.get_R(p, v),
                                          pos(i) - 5, pos(i) + 5, lambda tmp: vel(i) - 5,
                                          lambda tmp: vel(i) + 5, epsabs=1e-6)[0]
            np.savez_compressed('cpt.npz', reward_sizes=reward_sizes,
                                transition_cpt=transition_cpt)
        DPNetPop.__init__(self, transition_cpt, reward_cpt, pop_size, reward_sizes, set_W)