예제 #1
0
def cost_to_go(s, z, finish, walls):
    """ calculate cost to go Q(s,z) 
        s:current state, z:velocity (u,v) """
    (u, v) = z
    # next correct state without error:
    (x_correct, y_correct) = (s[0][0]+u, s[0][1]+v)
    # probability mass function:
    u_same = 0.6 if abs(u) > 1 else 1
    u_diff = 0.2
    v_same = 0.6 if abs(v) > 1 else 1
    v_diff = 0.2
    cost = 0
    for ((x_next, y_next), z_next) in next_possible_states_with_z(s, z):
        weight = (u_same * (x_next == x_correct) +  \
                  u_diff * (x_next != x_correct)) * \
                 (v_same * (y_next == y_correct) +  \
                  v_diff * (y_next != y_correct))
        s_next = ((x_next,y_next),z_next)

        # penalize not moving if not at the goal state
        if not racetrack.goal_test(s_next,finish):
            if z_next == (0,0):
                V[s_next] = 100

        if racetrack.crash((s[0],s_next[0]), walls):
            V[s_next] = 100

        cost += weight * V[s_next]
    return (1 + cost)
예제 #2
0
def leaves_not_goal(s0, policy, finish):
    """ return a set of leaves that are not in goal set """
    result = set()
    for s_leaf in transitive_closure(s0, policy) - set(policy):
        if not racetrack.goal_test(s_leaf,finish):
            result.add(s_leaf)
    return result
예제 #3
0
파일: proj2.py 프로젝트: rdchoe/school
def UCT(s, depth):
    global fline, g_walls, Q, t, seen
    if racetrack.goal_test(s, fline):
        return -50
    if depth == 0:
        return heuristics.h_walldist(s, fline, g_walls)

    # if there are no applicable velocities at this state then that means we are going to crash
    if not applicable(s):
        return 200
    if not s in seen:
        seen.append(s)
        t[s] = 0
        for z in applicable(s):
            Q[(s, z)] = 0
            t[(s, z)] = 0

    untried = list(filter(lambda x: t[(s, x)] == 0, applicable(s)))
    z_prime = None
    if untried:
        z_prime = random.choice(untried)
    else:
        #after experminentation, choosing 6 for the value of k seems to converge better
        x = list(
            map(
                lambda x: Q[
                    (s, x)] - 6 * math.sqrt(math.log(t[s]) / t[(s, x)]),
                applicable(s)))
        min_index, min_val = 0, x[0]
        for i, y in enumerate(x):
            if y < min_val:
                min_index, min_val = i, y
        z_prime = applicable(s)[min_index]

    (loc, (vx, vy)) = s

    #(wx,wy) = (z_prime[0]+vx , z_prime[1]+vy)

    error = supervisor.steering_error(z_prime[0], z_prime[1])

    new_state = ((loc[0] + z_prime[0] + error[0],
                  loc[1] + z_prime[1] + error[1]), z_prime)

    if racetrack.crash((loc, new_state[0]), g_walls):
        cost = 50  #a penalty
    else:
        cost = 1 + UCT(new_state, depth - 1)
    Q[(s, z_prime)] = (t[(s, z_prime)] * Q[(s, z_prime)] +
                       cost) / (1 + t[(s, z_prime)])
    t[s] = t[s] + 1
    t[(s, z_prime)] = t[(s, z_prime)] + 1
    return cost
예제 #4
0
def route(state, f_line, walls):
    """
    Gets the path using A* search.

    :param state:       The current state
    :param f_line:      The finish line of the arena
    :param walls:       The walls in the arena
    """

    # Get the goal, next state, and heuristic value
    goal = lambda state: racetrack.goal_test(state, f_line)
    next = lambda state: [(n, 1) for n in racetrack.next_states(state, walls)]
    h = lambda state: h_h2(state, f_line, walls)

    # gsr.py search using A* algorithm
    return gsr.search(state, next, goal, "a*", h, 0)
예제 #5
0
파일: proj3.py 프로젝트: rdchoe/school
def ExpectiMin(state, depth, player):
    global f_line, g_walls, t_table
    #If state is at goal, return 0
    if racetrack.goal_test(state, f_line):
        t_table[state] = 0
        return 0
    #if there no new states to be made, return the heurisitc value
    if racetrack.next_states(state,
                             g_walls) is None:  # we are at a terminal node
        score = heuristics.h_walldist(state, f_line, g_walls)
        t_table[state] = score
        return score
    #if depth is 0, we are at leaf node and want to return the heurisitc value of that state
    elif depth == 0:
        score = (heuristics.h_walldist(state, f_line, g_walls))
        #print('LEAF:', score, state)
        t_table[state] = score
        return score
    elif player == MIN:  #it's min's turn, we want to select the chance node child with minimum score
        score = math.inf
        for child in racetrack.next_states(state, g_walls):
            score = min(score, ExpectiMin(child, depth - 1, CHANCE))
        t_table[state] = score
        return score
    #if it's chance turn we want to take the sum of probability reaching a child * recursive call of algorithm
    elif player == CHANCE:
        score = 0
        ((x1, y1), (u1, v1)) = state
        possible_error_states = possible_state_with_error(state)
        for child in possible_error_states:
            score += probability(state, child) * ExpectiMin(
                child, depth - 1, MIN)
        #print(score)
        if score == 0:
            t_table[state] = math.inf
            return math.inf
        else:
            t_table[state] = math.inf
            return score