Example #1
0
 def __init__(self, pop_size=10):
     # transition_cpt(s,a,s') = p(s'|a,s)
     # 0 = Left, 1 = Right
     transition_cpt = np.zeros((1, 2, 1), dtype=float)
     # reward_cpt(s,a,r) = p(r|s,a)
     # 0 = False, 1 = True
     reward_cpt = np.array([[0, .1]])
     DPNetPop.__init__(self, transition_cpt, reward_cpt, pop_size)
 def __init__(self, pop_size=10):
     # transition_cpt(s,a,s') = p(s'|a,s)
     # 0 = Left, 1 = Right
     transition_cpt = np.zeros((1, 2, 1), dtype=float)
     # reward_cpt(s,a,r) = p(r|s,a)
     # 0 = False, 1 = True
     reward_cpt = np.array([[0, .1]])
     DPNetPop.__init__(self, transition_cpt, reward_cpt, pop_size)
Example #3
0
 def __init__(self, pop_size=1, set_W=True):
     # transition_cpt(s,a,s') = p(s'|a,s)
     # 0 = Left, 1 = Right
     transition_cpt = np.zeros((3, 2, 3), dtype=float)
     transition_cpt[0, 0, 1] = 1  # Going left in state 0 leads to state 1.
     transition_cpt[0, 1, 2] = 1  # Going right in state 0 leads to state 2.
     # reward_cpt(s,a,r) = p(r|s,a)
     # 0 = False, 1 = True
     reward_cpt = np.array([[0, 0], [1, 0], [.5, .75]])
     DPNetPop.__init__(self, transition_cpt, reward_cpt, pop_size, set_W=set_W)
Example #4
0
    def __init__(self, pop_size=1, steps=3, set_W=True):
        # reward_cpt(s,a,r) = p(r|s,a)
        self.num_p = num_p = 16
        self.num_v = num_v = 33
        num_s = num_p * num_v
        reward_cpt = np.ones((num_s, 3))
        self.steps = steps
        try:
            tmp = np.load('cpt.npz')
            transition_cpt = tmp['transition_cpt']
            reward_sizes = tmp['reward_sizes']
            print 'CPTs loaded'
            stdout.flush()
        except:
            print 'Creating CPTs'
            stdout.flush()
            # transition_cpt(s,a,s') = p(s'|a,s)
            # 0 = reverse (-1), 1 = no throttle (0), 2 = forward (1)
            transition_cpt = np.ones((num_s, 3, num_s), dtype=float) / 3.
            reward_sizes = np.zeros((num_s, 3))

            def pos(s):
                return self.pos(s)

            def vel(s):
                return self.vel(s)
            overlap = np.array([quad(lambda p: self.phi(p) * self.phi(j - p), -
                                     np.inf, np.inf, epsabs=1e-20) for j in range(max(num_p, num_v))])
            overlap[overlap[:, 0] < overlap[:, 1], 0] = 0
            overlap = overlap[:, 0]
            Kp = np.array([[overlap[min(abs(i - j), 16 - abs(i - j))]
                            for i in range(num_p)] for j in range(num_p)])
            Kv = np.array([[overlap[abs(i - j)]
                            for i in range(num_v)] for j in range(num_v)])
            self.invKp = invKp = np.linalg.inv(Kp)
            self.invKv = invKv = np.linalg.inv(Kv)

            def phi(i):
                sigma = .5
                return 0 if abs(i) > 5 else np.exp(-i**2 / (2 * sigma**2)) / (2 * np.pi)**(1. / 2) / sigma
            for i in xrange(num_s):
                for a in xrange(3):
                    integral = [cf.trapz2d(i, n, a, steps=steps) for n in xrange(num_s)]
                    for k in xrange(num_s):
                        transition_cpt[i, a, k] = np.sum([invKp[pos(k), pos(n)] * invKv[vel(k), vel(n)] *
                                                          integral[n] for n in xrange(num_s)])

                reward_sizes[i] = dblquad(lambda v, p: phi(pos(i) - p) * phi(vel(i) - v) * cf.get_R(p, v),
                                          pos(i) - 5, pos(i) + 5, lambda tmp: vel(i) - 5,
                                          lambda tmp: vel(i) + 5, epsabs=1e-6)[0]
            np.savez_compressed('cpt.npz', reward_sizes=reward_sizes,
                                transition_cpt=transition_cpt)
        DPNetPop.__init__(self, transition_cpt, reward_cpt, pop_size, reward_sizes, set_W)
 def __init__(self, n_actions=2, pop_size=1, set_W=True):
     # transition_cpt(s,a,s') = p(s'|a,s)
     transition_cpt = np.zeros((10, n_actions, 10), dtype=float)
     transition_cpt[:, :, 0] = 1
     transition_cpt[:, 1, 0] = 0
     for s in range(9):
         transition_cpt[s, 1, s + 1] = 1
     # reward_cpt(s,a,r) = p(r|s,a)
     # 0 = False, 1 = True
     reward_cpt = np.zeros((10, n_actions))
     reward_cpt[9, 1] = 1
     DPNetPop.__init__(
         self, transition_cpt, reward_cpt, pop_size, set_W=set_W)
    def __init__(self, pop_size=10, set_W=True):
        # transition_cpt(s,a,s') = p(s'|a,s)
        # 0 = North, 1 = East, 2=South, 3=West
        transition_cpt = np.zeros((264, 4, 264), dtype=float)
        p = [.05, .9, .05]
        for y in range(6):
            for x in range(7):
                for a in range(4):
                    if self.get_state_nr([y, x], [0, 0, 0]) < 0:
                        continue  # wall

                    for aa in range(3):

                        b = np.mod(a + aa - 1, 4)
                        npos = self.get_next_pos([y, x], b)

                        if np.min(npos) < 0 or np.max(npos - [5, 6]) > 0\
                           or self.get_state_nr(npos, [0, 0, 0]) < 0:  # walk against wall
                            for s in self.get_state_nr([y, x]):
                                transition_cpt[s, a, s] += p[aa]
                            continue
                        npos = list(npos)
                        for f0 in [0, 1]:
                            for f1 in [0, 1]:
                                for f2 in [0, 1]:
                                    if not (npos in [[0, 2], [5, 0], [4, 6]]):
                                        transition_cpt[self.get_state_nr([y, x], [f0, f1, f2]),
                                                       a, self.get_state_nr(npos, [f0, f1, f2]), ] += p[aa]
                                    elif npos == [0, 2]:
                                        transition_cpt[self.get_state_nr([y, x], [f0, f1, f2]),
                                                       a, self.get_state_nr(npos, [1, f1, f2]), ] += p[aa]
                                    elif npos == [5, 0]:
                                        transition_cpt[self.get_state_nr([y, x], [f0, f1, f2]),
                                                       a, self.get_state_nr(npos, [f0, 1, f2]), ] += p[aa]
                                    elif npos == [4, 6]:
                                        transition_cpt[self.get_state_nr([y, x], [f0, f1, f2]),
                                                       a, self.get_state_nr(npos, [f0, f1, 1]), ] += p[aa]

        # reward_cpt(s,a,r) = p(r|s,a)
        # 0 = False, 1 = True
        reward_cpt = np.ones((264, 4))
        reward_sizes = np.zeros((264, 4))
        for f0 in [0, 1]:
            for f1 in [0, 1]:
                for f2 in [0, 1]:
                    reward_sizes[self.get_state_nr([0, 6], [f0, f1, f2])] = (f0 + f1 + f2)
                    transition_cpt[self.get_state_nr([0, 6], [f0, f1, f2])] *= 0
        DPNetPop.__init__(self, transition_cpt, reward_cpt, pop_size, reward_sizes, set_W)
 def __init__(self, n_actions=2, pop_size=1, set_W=True):
     # transition_cpt(s,a,s') = p(s'|a,s)
     transition_cpt = np.zeros((10, n_actions, 10), dtype=float)
     transition_cpt[:, :, 0] = 1
     transition_cpt[:, 1, 0] = 0
     for s in range(9):
         transition_cpt[s, 1, s + 1] = 1
     # reward_cpt(s,a,r) = p(r|s,a)
     # 0 = False, 1 = True
     reward_cpt = np.zeros((10, n_actions))
     reward_cpt[9, 1] = 1
     DPNetPop.__init__(self,
                       transition_cpt,
                       reward_cpt,
                       pop_size,
                       set_W=set_W)
    def __init__(self, pop_size=1, set_W=True):
        # transition_cpt(s,a,s') = p(s'|a,s)
        transition_cpt = np.zeros((10, 2, 10), dtype=float)
        for s in range(9):
            transition_cpt[s, 1, s + 1] = 1
        for s in range(6):
            transition_cpt[s, 0, 0] = 1
        for s in range(6, 10):
            transition_cpt[s, 0, 6] = 1

        # reward_cpt(s,a,r) = p(r|s,a)
        # 0 = False, 1 = True
        reward_cpt = np.zeros((10, 2))
        reward_cpt[9, 1] = 1
        reward_cpt[5, 1] = 1
        DPNetPop.__init__(
            self, transition_cpt, reward_cpt, pop_size, set_W=set_W)
    def __init__(self, pop_size=1, set_W=True):
        # transition_cpt(s,a,s') = p(s'|a,s)
        transition_cpt = np.zeros((10, 2, 10), dtype=float)
        for s in range(9):
            transition_cpt[s, 1, s + 1] = 1
        for s in range(6):
            transition_cpt[s, 0, 0] = 1
        for s in range(6, 10):
            transition_cpt[s, 0, 6] = 1

        # reward_cpt(s,a,r) = p(r|s,a)
        # 0 = False, 1 = True
        reward_cpt = np.zeros((10, 2))
        reward_cpt[9, 1] = 1
        reward_cpt[5, 1] = 1
        DPNetPop.__init__(self,
                          transition_cpt,
                          reward_cpt,
                          pop_size,
                          set_W=set_W)
Example #10
0
    def __init__(self, pop_size=10, set_W=True):
        self.pdealer = np.zeros((27, 12, 2))
        for i in range(2, 10):
            self.pdealer[i, i, 0] = 1. / 13
        self.pdealer[10, 10, 0] = 4. / 13
        self.pdealer[11, 11, 1] = 1. / 13

        def draw(x):
            for card in range(2, 12):
                if card == 10:
                    if x[0] + card > 21 and x[2] > 0:
                        self.pdealer[x[0] + card - 10, x[1], x[2] -
                                     1] += self.pdealer[tuple(x)] * 4. / 13
                    else:
                        self.pdealer[x[0] + card, x[1], x[2]] += self.pdealer[tuple(x)] * 4. / 13
                elif card == 11:
                    if x[0] + card > 21:
                        self.pdealer[x[0] + 1, x[1], x[2]] += self.pdealer[tuple(x)] * 1. / 13
                    else:
                        self.pdealer[x[0] + 11, x[1], x[2] + 1] += self.pdealer[tuple(x)] * 1. / 13
                else:
                    if x[0] + card > 21 and x[2] > 0:
                        self.pdealer[x[0] + card - 10, x[1], x[2] -
                                     1] += self.pdealer[tuple(x)] * 1. / 13
                    else:
                        self.pdealer[x[0] + card, x[1], x[2]] += self.pdealer[tuple(x)] * 1. / 13
            self.pdealer[tuple(x)] = 0
        while np.sum(self.pdealer[:17]) > 0:
            for hand in range(2, 17):
                for face in range(2, 12):
                    for ace in [0, 1]:
                        if self.pdealer[hand, face, ace] > 0:
                            draw([hand, face, ace])

        def next_x(x, card):
            xx = np.array(x) + [card, 0, card == 11]
            if xx[0] > 21 and xx[-1] > 0:
                xx[0] -= 10
                xx[-1] -= 1
            if xx[0] > 21:
                xx[0] = 22
            return xx

        pxinit = np.zeros((22, 12, 2))
        for hand in range(2, 12):
            ace = 1 if hand == 11 else 0
            for face in range(2, 12):
                if hand == 10 and face == 10:
                    pxinit[hand, face, ace] = 4. / 13 * 4. / 13
                elif hand == 10 or face == 10:
                    pxinit[hand, face, ace] = 4. / 13 / 13
                else:
                    pxinit[hand, face, ace] = 1. / 13 / 13
        self.pxinit = np.zeros((22, 12, 2))
        self.pstart_state = np.zeros(380)
        for hand in range(2, 12):
            for face in range(2, 12):
                ace = 1 if hand == 11 else 0
                for card in range(2, 12):
                    nx = tuple(next_x([hand, face, ace], card))
                    self.pxinit[nx] += pxinit[hand, face, ace] * \
                        (4. / 13 if card == 10 else 1. / 13)
                    self.pstart_state[self.get_state_nr(nx)] = self.pxinit[nx]

        # transition_cpt(s,a,s') = p(s'|a,s)
        # 0 = Hit, 1 = Stick
        transition_cpt = np.zeros((380, 2, 380), dtype=float)
        for hand in range(4, 22):
            for face in range(2, 12):
                for ace in [0, 1]:
                    if ace == 1 and hand < 11:
                        continue
                    for card in range(2, 12):
                        transition_cpt[self.get_state_nr([hand, face, ace]), 0,
                                       self.get_state_nr(tuple(next_x([hand, face, ace], card)))]\
                            = 4. / 13 if card == 10 else 1. / 13

        # reward_cpt(s,a,r) = p(r|s,a)
        reward_sizes = np.zeros((380, 2, 3))  # reward can have 3 values (0,.5,1)
        reward_sizes[:, :, 1] = .5
        reward_sizes[:, :, 2] = 1
        reward_cpt = np.zeros((380, 2, 3))
        reward_cpt[:, :, 0] = 1  # default no reward
        for hand in range(4, 22):
            for face in range(2, 12):
                for ace in [0, 1]:
                    reward_cpt[self.get_state_nr([hand, face, ace]), 1] = self.get_prew(hand, face)

        DPNetPop.__init__(self, transition_cpt, reward_cpt, pop_size, reward_sizes, set_W)
Example #11
0
    def __init__(self, pop_size=10, set_W=True):
        # transition_cpt(s,a,s') = p(s'|a,s)
        # 0 = North, 1 = East, 2=South, 3=West
        transition_cpt = np.zeros((264, 4, 264), dtype=float)
        p = [.05, .9, .05]
        for y in range(6):
            for x in range(7):
                for a in range(4):
                    if self.get_state_nr([y, x], [0, 0, 0]) < 0:
                        continue  # wall

                    for aa in range(3):

                        b = np.mod(a + aa - 1, 4)
                        npos = self.get_next_pos([y, x], b)

                        if np.min(npos) < 0 or np.max(npos - [5, 6]) > 0\
                           or self.get_state_nr(npos, [0, 0, 0]) < 0:  # walk against wall
                            for s in self.get_state_nr([y, x]):
                                transition_cpt[s, a, s] += p[aa]
                            continue
                        npos = list(npos)
                        for f0 in [0, 1]:
                            for f1 in [0, 1]:
                                for f2 in [0, 1]:
                                    if not (npos in [[0, 2], [5, 0], [4, 6]]):
                                        transition_cpt[
                                            self.
                                            get_state_nr([y, x], [f0, f1, f2]),
                                            a,
                                            self.get_state_nr(
                                                npos, [f0, f1, f2]), ] += p[aa]
                                    elif npos == [0, 2]:
                                        transition_cpt[
                                            self.
                                            get_state_nr([y, x], [f0, f1, f2]),
                                            a,
                                            self.get_state_nr(
                                                npos, [1, f1, f2]), ] += p[aa]
                                    elif npos == [5, 0]:
                                        transition_cpt[
                                            self.
                                            get_state_nr([y, x], [f0, f1, f2]),
                                            a,
                                            self.get_state_nr(
                                                npos, [f0, 1, f2]), ] += p[aa]
                                    elif npos == [4, 6]:
                                        transition_cpt[
                                            self.
                                            get_state_nr([y, x], [f0, f1, f2]),
                                            a,
                                            self.get_state_nr(
                                                npos, [f0, f1, 1]), ] += p[aa]

        # reward_cpt(s,a,r) = p(r|s,a)
        # 0 = False, 1 = True
        reward_cpt = np.ones((264, 4))
        reward_sizes = np.zeros((264, 4))
        for f0 in [0, 1]:
            for f1 in [0, 1]:
                for f2 in [0, 1]:
                    reward_sizes[self.get_state_nr(
                        [0, 6], [f0, f1, f2])] = (f0 + f1 + f2)
                    transition_cpt[self.get_state_nr([0, 6],
                                                     [f0, f1, f2])] *= 0
        DPNetPop.__init__(self, transition_cpt, reward_cpt, pop_size,
                          reward_sizes, set_W)