コード例 #1
0
 def __init__(self, field_size, ball_speed=1, random_start=True):
     # image space is n by n
     self.q = None
     self.n = field_size
     h = self.n * ball_speed
     self.discount_factor = (h - 1.0) / h
     self.ball_speed = ball_speed
     # state space is: ball position and velocity, paddle position
     # and velocity
     # - ball position is n by n
     # - ball velocity is one of (-1, -1), (-1, 1), (0, -1), (0, 1),
     #                          (1, -1), (1, 1)
     # - paddle position is n; this is location of bottom of paddle,
     #    can stick "up" out of the screen
     # - paddle velocity is one of 1, 0, -1
     self.states = [((br, bc), (brv, bcv), pp, pv) for \
                      br in range(self.n) for
                      bc in range(self.n) for
                      brv in (-1, 0, 1) for
                      bcv in (-1, 1) for
                      pp in range(self.n) for
                      pv in (-1, 0, 1)]
     self.states.append('over')
     self.start = dist.uniform_dist([((br, 0), (0, 1), 0, 0) \
                                     for br in range(self.n)]) \
             if random_start else  \
             dist.delta_dist(((int(self.n/2), 0), (0, 1), 0, 0))
コード例 #2
0
ファイル: mdp.py プロジェクト: wesenu/MIT6.036
 def __init__(self, states, actions, transition_model, reward_fn, 
                  discount_factor = 1.0, start_dist = None):
     self.states = states
     self.actions = actions
     self.transition_model = transition_model
     self.reward_fn = reward_fn
     self.discount_factor = discount_factor
     self.start = start_dist if start_dist else uniform_dist(states)
コード例 #3
0
 def __init__(self, grid_size, stride_factor=1, random_start=True):
     self.q = None
     self.n = grid_size
     self.actions = ['up', 'down', 'left', 'right']
     self.discount_factor = 1
     self.stride = stride_factor
     self.states = [((px,py), (rx,ry)) for px in range(self.n) \
                     for py in range(self.n)
                     for rx in range(self.n)
                     for ry in range(self.n)]
     self.states.append('over')
     if random_start:
         self.start = dist.uniform_dist([((0, 0), (int(self.n / 2), ry))
                                         for ry in range(self.n)])
     else:
         self.start = dist.delta_dist(
             ((0, 0), (int(self.n / 2), int(self.n / 2))))
コード例 #4
0
ファイル: mdp10.py プロジェクト: wesenu/MIT6.036
def epsilon_greedy(q, s, eps=0.5):
    """ Return an action.

    >>> q = TabularQ([0,1,2,3],['b','c'])
    >>> q.set(0, 'b', 5)
    >>> q.set(0, 'c', 10)
    >>> q.set(1, 'b', 2)
    >>> eps = 0.
    >>> epsilon_greedy(q, 0, eps) #greedy
    'c'
    >>> epsilon_greedy(q, 1, eps) #greedy
    'b'
    
    """
    if random.random() < eps:
        return uniform_dist(q.actions).draw()
    else:
        return greedy(q, s)
コード例 #5
0
ファイル: mdp.py プロジェクト: wesenu/MIT6.036
def epsilon_greedy(q, s, eps = 0.5):
    """ Return an action.

    >>> q = TabularQ([0,1,2,3],['b','c'])
    >>> q.set(0, 'b', 5)
    >>> q.set(0, 'c', 10)
    >>> q.set(1, 'b', 2)
    >>> eps = 0.
    >>> epsilon_greedy(q, 0, eps) #greedy
    'c'
    >>> epsilon_greedy(q, 1, eps) #greedy
    'b'
    
    """
    ddis=uniform_dist(['b','c'])
    if random.random() < eps:  # True with prob eps, random action
        return ddis.draw()
    else:
        return greedy(q,s)
コード例 #6
0
def epsilon_greedy(q, s, eps=0.5):  # Your code here
    if random.random() < eps:  # True with prob eps, random action
        return uniform_dist(q.actions).draw()
    else:
        return greedy(q, s)
コード例 #7
0
def epsilon_greedy(q, s, eps=0.5):
    if random.random() < eps:  # True with prob eps, random action
        return uniform_dist(q.actions).draw()
    else:  # False with prob 1-eps, greedy action
        return greedy(q, s)
コード例 #8
0
    def transition_model(self, s, a):

        if s == 'over':
            return dist.delta_dist('over')
        # the state
        ((px, py), (rx, ry)) = s
        # all possible actions
        if a == 'up':
            new_px = px
            if py + 2 > self.n - 1:
                new_py = self.n - 1
            else:
                new_py = py + 2
        if a == 'down':
            new_px = px
            if py - 2 < 0:
                new_py = 0
            else:
                new_py = py - 2
        if a == 'left':
            new_py = py
            if px - 2 < 0:
                new_px = 0
            else:
                new_px = px - 2
        if a == 'right':
            new_py = py
            if px + 2 > self.n - 1:
                new_px = self.n - 1
            else:
                new_px = px + 2
        # end all possible actions

        # movement of reward (rx, ry)
        new_rx_up = rx
        if ry + self.stride > self.n - 1:
            new_ry_up = self.n - 1
        else:
            new_ry_up = ry + self.stride
        new_rx_down = rx
        if ry - self.stride < 0:
            new_ry_down = 0
        else:
            new_ry_down = ry - self.stride
        new_ry_left = ry
        if rx - self.stride < 0:
            new_rx_left = 0
        else:
            new_rx_left = rx - self.stride
        new_ry_right = ry
        if rx + self.stride > self.n - 1:
            new_rx_right = self.n - 1
        else:
            new_rx_right = rx + self.stride
        new_s_up = ((new_px, new_py), (new_rx_up, new_ry_up))
        new_s_down = ((new_px, new_py), (new_rx_down, new_ry_down))
        new_s_left = ((new_px, new_py), (new_rx_left, new_ry_left))
        new_s_right = ((new_px, new_py), (new_rx_right, new_ry_right))
        n_set = set([new_s_up, new_s_down, new_s_left, new_s_right])
        ret_list = list(n_set)
        if rx == new_px:
            return dist.delta_dist('over')
        else:
            return dist.uniform_dist(ret_list)