def transition_model(self, s, a):
        # Current state
        (itmp, ttmp, actmp, acst) = s

        # Nominal next state
        al_on = 0.5
        al_of = 0.9

        if acst:
            itmp = al_on * itmp + (1 - al_on) * actmp
        else:
            itmp = al_of * itmp + (1 - al_of) * etmp

        actmp += a
        actry = (a != 0)

        itmp = r_stvar(bound(itmp, minT, maxT))
        actmp = r_stvar(bound(actmp, minT, maxT))

        if actry != acst:
            mpr = (itmp, ttmp, actmp, acst)
            ipr = (itmp, ttmp, actmp, actry)

            pr = 0.5
            return dist.DDist({mpr: 1 - pr, ipr: pr})

        new_s = (itmp, ttmp, actmp, acst)
        return dist.delta_dist(new_s)
Example #2
0
 def __init__(self, field_size, ball_speed=1, random_start=True):
     # image space is n by n
     self.q = None
     self.n = field_size
     h = self.n * ball_speed
     self.discount_factor = (h - 1.0) / h
     self.ball_speed = ball_speed
     # state space is: ball position and velocity, paddle position
     # and velocity
     # - ball position is n by n
     # - ball velocity is one of (-1, -1), (-1, 1), (0, -1), (0, 1),
     #                          (1, -1), (1, 1)
     # - paddle position is n; this is location of bottom of paddle,
     #    can stick "up" out of the screen
     # - paddle velocity is one of 1, 0, -1
     self.states = [((br, bc), (brv, bcv), pp, pv) for \
                      br in range(self.n) for
                      bc in range(self.n) for
                      brv in (-1, 0, 1) for
                      bcv in (-1, 1) for
                      pp in range(self.n) for
                      pv in (-1, 0, 1)]
     self.states.append('over')
     self.start = dist.uniform_dist([((br, 0), (0, 1), 0, 0) \
                                     for br in range(self.n)]) \
             if random_start else  \
             dist.delta_dist(((int(self.n/2), 0), (0, 1), 0, 0))
Example #3
0
    def transition_model(self, s, a, p = 0.4):
        # Only randomness is in brv and brc after a bounce
        # 1- prob of negating nominal velocity
        if s == 'over':
            return dist.delta_dist('over')
        # Current state
        ((br, bc), (brv, bcv), pp, pv) = s
        # Nominal next ball state
        new_br = br + self.ball_speed*brv; new_brv = brv
        new_bc = bc + self.ball_speed*bcv; new_bcv = bcv
        # nominal paddle state, a is action (-1, 0, 1)
        new_pp = max(0, min(self.n-1, pp + a))
        new_pv = a
        new_s = None
        hit_r = hit_c = False
        # bottom, top contacts
        if new_br < 0:
            new_br = 0; new_brv = 1; hit_r = True
        elif new_br >= self.n:
            new_br = self.n - 1; new_brv = -1; hit_r = True
        # back, front contacts
        if new_bc < 0:                  # back bounce
            new_bc = 0; new_bcv = 1; hit_c = True
        elif new_bc >= self.n:
            if self.paddle_hit(pp, new_pp, br, bc, new_br, new_bc):
                new_bc = self.n-1; new_bcv = -1; hit_c = True
            else:
                return dist.delta_dist('over')

        new_s = ((new_br, new_bc), (new_brv, new_bcv), new_pp, new_pv)
        if ((not hit_c) and (not hit_r)):
            return dist.delta_dist(new_s)
        elif hit_c:                     # also hit_c and hit_r
            if abs(new_brv) > 0:
                return dist.DDist({new_s: p,
                                   ((new_br, new_bc), (-new_brv, new_bcv), new_pp, new_pv) : 1-p})
            else:
                return dist.DDist({new_s: p,
                                   ((new_br, new_bc), (-1, new_bcv), new_pp, new_pv) : 0.5*(1-p),
                                   ((new_br, new_bc), (1, new_bcv), new_pp, new_pv) : 0.5*(1-p)})
        elif hit_r:
            return dist.DDist({new_s: p,
                               ((new_br, new_bc), (new_brv, -new_bcv), new_pp, new_pv) : 1-p})
Example #4
0
 def __init__(self, grid_size, stride_factor=1, random_start=True):
     self.q = None
     self.n = grid_size
     self.actions = ['up', 'down', 'left', 'right']
     self.discount_factor = 1
     self.stride = stride_factor
     self.states = [((px,py), (rx,ry)) for px in range(self.n) \
                     for py in range(self.n)
                     for rx in range(self.n)
                     for ry in range(self.n)]
     self.states.append('over')
     if random_start:
         self.start = dist.uniform_dist([((0, 0), (int(self.n / 2), ry))
                                         for ry in range(self.n)])
     else:
         self.start = dist.delta_dist(
             ((0, 0), (int(self.n / 2), int(self.n / 2))))
    def __init__(self, start=(20*u, 25*u, 30*u, False)):
        self.q = None
        self.discount_factor = 0.99

        # +1 so that range is inclusive
        self.states = [
            (itmp, ttmp, actmp, acst)
            for itmp in range(minT, maxT + 1)
            for ttmp in range(minT, maxT + 1)
            for actmp in range(minT, maxT + 1)
            for acst in (True, False)
        ]
        # self.states.append('over')

        self.actions = [
            +1,
            0,
            -1,
            np.nextafter(0, 1),  # keep AC on (!= 0) but do not move target
        ]

        self.start = dist.delta_dist(start)
Example #6
0
    def transition_model(self, s, a):

        if s == 'over':
            return dist.delta_dist('over')
        # the state
        ((px, py), (rx, ry)) = s
        # all possible actions
        if a == 'up':
            new_px = px
            if py + 2 > self.n - 1:
                new_py = self.n - 1
            else:
                new_py = py + 2
        if a == 'down':
            new_px = px
            if py - 2 < 0:
                new_py = 0
            else:
                new_py = py - 2
        if a == 'left':
            new_py = py
            if px - 2 < 0:
                new_px = 0
            else:
                new_px = px - 2
        if a == 'right':
            new_py = py
            if px + 2 > self.n - 1:
                new_px = self.n - 1
            else:
                new_px = px + 2
        # end all possible actions

        # movement of reward (rx, ry)
        new_rx_up = rx
        if ry + self.stride > self.n - 1:
            new_ry_up = self.n - 1
        else:
            new_ry_up = ry + self.stride
        new_rx_down = rx
        if ry - self.stride < 0:
            new_ry_down = 0
        else:
            new_ry_down = ry - self.stride
        new_ry_left = ry
        if rx - self.stride < 0:
            new_rx_left = 0
        else:
            new_rx_left = rx - self.stride
        new_ry_right = ry
        if rx + self.stride > self.n - 1:
            new_rx_right = self.n - 1
        else:
            new_rx_right = rx + self.stride
        new_s_up = ((new_px, new_py), (new_rx_up, new_ry_up))
        new_s_down = ((new_px, new_py), (new_rx_down, new_ry_down))
        new_s_left = ((new_px, new_py), (new_rx_left, new_ry_left))
        new_s_right = ((new_px, new_py), (new_rx_right, new_ry_right))
        n_set = set([new_s_up, new_s_down, new_s_left, new_s_right])
        ret_list = list(n_set)
        if rx == new_px:
            return dist.delta_dist('over')
        else:
            return dist.uniform_dist(ret_list)