Ejemplo n.º 1
0
def run_rl():
    walls = [5, 6, 13]
    height = 4
    width = 5
    m = build_maze(width, height, walls, hit=True)

    q = policy_iteration_q(m, render=False)
    pol = get_policy_from_q(q)
    print("TD-learning")
    temporal_difference(m, pol, render=True)
    print("Q-learning")
    q_learning(m, tau=6)
    print("Sarsa")
    sarsa(m, tau=6)
    input("press enter")
Ejemplo n.º 2
0
def create_maze(width, height, ratio):
    size = width * height
    n_walls = round(ratio * size)

    stop = False
    m = None
    # the loop below is used to check that the maze has a solution
    # if one of the values after value iteration is null, then another maze should be produced
    while not stop:
        walls = random.sample(range(size), int(n_walls))

        m = build_maze(width, height, walls)
        v, _, _, _ = value_iteration_v(m, render=False)
        if np.all(v):
            stop = True
    return m
Ejemplo n.º 3
0
def run_dyna_prog():
    # walls = [7, 8, 9, 10, 21,27,30,31,32,33,45, 46, 47]
    # height = 6
    # width = 9
    #
    m = build_maze(width, height, walls)  # maze-like MDP definition
    # m = create_maze(10, 10, 0.2)
    m.render()
    # plot_convergence_vi_pi(m, False)
    #
    print("value iteration V")
    cpt = Chrono()
    q, _, nbIter, nbUd = value_iteration_v(m, render=0)
    print(nbIter, nbUd)
    print(len(q))
    cpt.stop()
Ejemplo n.º 4
0
def run_rl():
    walls = [5, 6, 13]
    height = 4
    width = 5
    m = build_maze(width, height, walls, hit=True)
    # m = create_maze(8, 8, 0.2)
    print("1")
    q,_,_,_ = policy_iteration_q(m, render=0)
    print("1")
    pol = get_policy_from_q(q)
    # print("TD-learning")
    # temporal_difference(m, pol, render=True)
    # input("press enter")
    # print("Q-learning")
    # q_learning_eps(m, tau=6)
    plot_ql_sarsa_para(m, 0.001, 6, 1000, 50, 0.5, False)
Ejemplo n.º 5
0
def run_dyna_prog():
    # walls = [14, 15, 16, 31, 45, 46, 47]
    # height = 6
    # width = 9
    walls = [5, 6, 13]
    height = 4
    width = 5

    m = build_maze(width, height, walls)  # maze-like MDP definition

    print("value iteration V")
    value_iteration_v(m, render=True)
    print("value iteration Q")
    value_iteration_q(m, render=True)
    print("policy iteration Q")
    policy_iteration_q(m, render=True)
    print("policy iteration V")
    policy_iteration_v(m, render=True)
    input("press enter")
Ejemplo n.º 6
0
from maze import build_maze
from room import Room

maze = build_maze()
maze.explore_maze()
Ejemplo n.º 7
0
from random import randint
from math import inf
from maze import build_maze, explore_maze, print_maze_paths, mow
from graph import dijkstras, a_star, bfs, quicksort
from colors import colors


#define grid size
grid_size_row = 15
grid_size_colmun = 35

#swag list
swags = ['*', '+', 'candy corn', 'werewolf', 'pumpkin', 'apple', 'banana', 'diamond', 'melon', 'watermelon', 'kiwi']

# building maze
grid, start_i, start_j, end_i, end_j = build_maze(grid_size_row, grid_size_colmun, swags)

# rendering maze
print( colors.CBLUE2 + "   ## Maze ## " + colors.CEND )
print_maze_paths(grid, None)
print("\nGrid size row: {0}, column: {1}".format(grid_size_row, grid_size_colmun))
print("Start point: {0},{1}\nEnd point: {2},{3}".format(start_i, start_j, end_i, end_j))

# run dijkstras's algorithm to find a path to the end
paths_and_distances, count = dijkstras(grid, start_i, start_j)
print("\n" + colors.CBLUE2 + "   ## Dijkstras ## " + colors.CEND)
print_maze_paths(grid, paths_and_distances[end_i][end_j][1])

# Debug only
#print("\ndistances for each cell:\n")
#for row in paths_and_distances: