def cost_to_go(s, z, finish, walls): """ calculate cost to go Q(s,z) s:current state, z:velocity (u,v) """ (u, v) = z # next correct state without error: (x_correct, y_correct) = (s[0][0]+u, s[0][1]+v) # probability mass function: u_same = 0.6 if abs(u) > 1 else 1 u_diff = 0.2 v_same = 0.6 if abs(v) > 1 else 1 v_diff = 0.2 cost = 0 for ((x_next, y_next), z_next) in next_possible_states_with_z(s, z): weight = (u_same * (x_next == x_correct) + \ u_diff * (x_next != x_correct)) * \ (v_same * (y_next == y_correct) + \ v_diff * (y_next != y_correct)) s_next = ((x_next,y_next),z_next) # penalize not moving if not at the goal state if not racetrack.goal_test(s_next,finish): if z_next == (0,0): V[s_next] = 100 if racetrack.crash((s[0],s_next[0]), walls): V[s_next] = 100 cost += weight * V[s_next] return (1 + cost)
def leaves_not_goal(s0, policy, finish): """ return a set of leaves that are not in goal set """ result = set() for s_leaf in transitive_closure(s0, policy) - set(policy): if not racetrack.goal_test(s_leaf,finish): result.add(s_leaf) return result
def UCT(s, depth): global fline, g_walls, Q, t, seen if racetrack.goal_test(s, fline): return -50 if depth == 0: return heuristics.h_walldist(s, fline, g_walls) # if there are no applicable velocities at this state then that means we are going to crash if not applicable(s): return 200 if not s in seen: seen.append(s) t[s] = 0 for z in applicable(s): Q[(s, z)] = 0 t[(s, z)] = 0 untried = list(filter(lambda x: t[(s, x)] == 0, applicable(s))) z_prime = None if untried: z_prime = random.choice(untried) else: #after experminentation, choosing 6 for the value of k seems to converge better x = list( map( lambda x: Q[ (s, x)] - 6 * math.sqrt(math.log(t[s]) / t[(s, x)]), applicable(s))) min_index, min_val = 0, x[0] for i, y in enumerate(x): if y < min_val: min_index, min_val = i, y z_prime = applicable(s)[min_index] (loc, (vx, vy)) = s #(wx,wy) = (z_prime[0]+vx , z_prime[1]+vy) error = supervisor.steering_error(z_prime[0], z_prime[1]) new_state = ((loc[0] + z_prime[0] + error[0], loc[1] + z_prime[1] + error[1]), z_prime) if racetrack.crash((loc, new_state[0]), g_walls): cost = 50 #a penalty else: cost = 1 + UCT(new_state, depth - 1) Q[(s, z_prime)] = (t[(s, z_prime)] * Q[(s, z_prime)] + cost) / (1 + t[(s, z_prime)]) t[s] = t[s] + 1 t[(s, z_prime)] = t[(s, z_prime)] + 1 return cost
def route(state, f_line, walls): """ Gets the path using A* search. :param state: The current state :param f_line: The finish line of the arena :param walls: The walls in the arena """ # Get the goal, next state, and heuristic value goal = lambda state: racetrack.goal_test(state, f_line) next = lambda state: [(n, 1) for n in racetrack.next_states(state, walls)] h = lambda state: h_h2(state, f_line, walls) # gsr.py search using A* algorithm return gsr.search(state, next, goal, "a*", h, 0)
def ExpectiMin(state, depth, player): global f_line, g_walls, t_table #If state is at goal, return 0 if racetrack.goal_test(state, f_line): t_table[state] = 0 return 0 #if there no new states to be made, return the heurisitc value if racetrack.next_states(state, g_walls) is None: # we are at a terminal node score = heuristics.h_walldist(state, f_line, g_walls) t_table[state] = score return score #if depth is 0, we are at leaf node and want to return the heurisitc value of that state elif depth == 0: score = (heuristics.h_walldist(state, f_line, g_walls)) #print('LEAF:', score, state) t_table[state] = score return score elif player == MIN: #it's min's turn, we want to select the chance node child with minimum score score = math.inf for child in racetrack.next_states(state, g_walls): score = min(score, ExpectiMin(child, depth - 1, CHANCE)) t_table[state] = score return score #if it's chance turn we want to take the sum of probability reaching a child * recursive call of algorithm elif player == CHANCE: score = 0 ((x1, y1), (u1, v1)) = state possible_error_states = possible_state_with_error(state) for child in possible_error_states: score += probability(state, child) * ExpectiMin( child, depth - 1, MIN) #print(score) if score == 0: t_table[state] = math.inf return math.inf else: t_table[state] = math.inf return score