def __init__(self, field_size, ball_speed=1, random_start=True): # image space is n by n self.q = None self.n = field_size h = self.n * ball_speed self.discount_factor = (h - 1.0) / h self.ball_speed = ball_speed # state space is: ball position and velocity, paddle position # and velocity # - ball position is n by n # - ball velocity is one of (-1, -1), (-1, 1), (0, -1), (0, 1), # (1, -1), (1, 1) # - paddle position is n; this is location of bottom of paddle, # can stick "up" out of the screen # - paddle velocity is one of 1, 0, -1 self.states = [((br, bc), (brv, bcv), pp, pv) for \ br in range(self.n) for bc in range(self.n) for brv in (-1, 0, 1) for bcv in (-1, 1) for pp in range(self.n) for pv in (-1, 0, 1)] self.states.append('over') self.start = dist.uniform_dist([((br, 0), (0, 1), 0, 0) \ for br in range(self.n)]) \ if random_start else \ dist.delta_dist(((int(self.n/2), 0), (0, 1), 0, 0))
def __init__(self, states, actions, transition_model, reward_fn, discount_factor = 1.0, start_dist = None): self.states = states self.actions = actions self.transition_model = transition_model self.reward_fn = reward_fn self.discount_factor = discount_factor self.start = start_dist if start_dist else uniform_dist(states)
def __init__(self, grid_size, stride_factor=1, random_start=True): self.q = None self.n = grid_size self.actions = ['up', 'down', 'left', 'right'] self.discount_factor = 1 self.stride = stride_factor self.states = [((px,py), (rx,ry)) for px in range(self.n) \ for py in range(self.n) for rx in range(self.n) for ry in range(self.n)] self.states.append('over') if random_start: self.start = dist.uniform_dist([((0, 0), (int(self.n / 2), ry)) for ry in range(self.n)]) else: self.start = dist.delta_dist( ((0, 0), (int(self.n / 2), int(self.n / 2))))
def epsilon_greedy(q, s, eps=0.5): """ Return an action. >>> q = TabularQ([0,1,2,3],['b','c']) >>> q.set(0, 'b', 5) >>> q.set(0, 'c', 10) >>> q.set(1, 'b', 2) >>> eps = 0. >>> epsilon_greedy(q, 0, eps) #greedy 'c' >>> epsilon_greedy(q, 1, eps) #greedy 'b' """ if random.random() < eps: return uniform_dist(q.actions).draw() else: return greedy(q, s)
def epsilon_greedy(q, s, eps = 0.5): """ Return an action. >>> q = TabularQ([0,1,2,3],['b','c']) >>> q.set(0, 'b', 5) >>> q.set(0, 'c', 10) >>> q.set(1, 'b', 2) >>> eps = 0. >>> epsilon_greedy(q, 0, eps) #greedy 'c' >>> epsilon_greedy(q, 1, eps) #greedy 'b' """ ddis=uniform_dist(['b','c']) if random.random() < eps: # True with prob eps, random action return ddis.draw() else: return greedy(q,s)
def epsilon_greedy(q, s, eps=0.5): # Your code here if random.random() < eps: # True with prob eps, random action return uniform_dist(q.actions).draw() else: return greedy(q, s)
def epsilon_greedy(q, s, eps=0.5): if random.random() < eps: # True with prob eps, random action return uniform_dist(q.actions).draw() else: # False with prob 1-eps, greedy action return greedy(q, s)
def transition_model(self, s, a): if s == 'over': return dist.delta_dist('over') # the state ((px, py), (rx, ry)) = s # all possible actions if a == 'up': new_px = px if py + 2 > self.n - 1: new_py = self.n - 1 else: new_py = py + 2 if a == 'down': new_px = px if py - 2 < 0: new_py = 0 else: new_py = py - 2 if a == 'left': new_py = py if px - 2 < 0: new_px = 0 else: new_px = px - 2 if a == 'right': new_py = py if px + 2 > self.n - 1: new_px = self.n - 1 else: new_px = px + 2 # end all possible actions # movement of reward (rx, ry) new_rx_up = rx if ry + self.stride > self.n - 1: new_ry_up = self.n - 1 else: new_ry_up = ry + self.stride new_rx_down = rx if ry - self.stride < 0: new_ry_down = 0 else: new_ry_down = ry - self.stride new_ry_left = ry if rx - self.stride < 0: new_rx_left = 0 else: new_rx_left = rx - self.stride new_ry_right = ry if rx + self.stride > self.n - 1: new_rx_right = self.n - 1 else: new_rx_right = rx + self.stride new_s_up = ((new_px, new_py), (new_rx_up, new_ry_up)) new_s_down = ((new_px, new_py), (new_rx_down, new_ry_down)) new_s_left = ((new_px, new_py), (new_rx_left, new_ry_left)) new_s_right = ((new_px, new_py), (new_rx_right, new_ry_right)) n_set = set([new_s_up, new_s_down, new_s_left, new_s_right]) ret_list = list(n_set) if rx == new_px: return dist.delta_dist('over') else: return dist.uniform_dist(ret_list)