def run_rl(): walls = [5, 6, 13] height = 4 width = 5 m = build_maze(width, height, walls, hit=True) q = policy_iteration_q(m, render=False) pol = get_policy_from_q(q) print("TD-learning") temporal_difference(m, pol, render=True) print("Q-learning") q_learning(m, tau=6) print("Sarsa") sarsa(m, tau=6) input("press enter")
def create_maze(width, height, ratio): size = width * height n_walls = round(ratio * size) stop = False m = None # the loop below is used to check that the maze has a solution # if one of the values after value iteration is null, then another maze should be produced while not stop: walls = random.sample(range(size), int(n_walls)) m = build_maze(width, height, walls) v, _, _, _ = value_iteration_v(m, render=False) if np.all(v): stop = True return m
def run_dyna_prog(): # walls = [7, 8, 9, 10, 21,27,30,31,32,33,45, 46, 47] # height = 6 # width = 9 # m = build_maze(width, height, walls) # maze-like MDP definition # m = create_maze(10, 10, 0.2) m.render() # plot_convergence_vi_pi(m, False) # print("value iteration V") cpt = Chrono() q, _, nbIter, nbUd = value_iteration_v(m, render=0) print(nbIter, nbUd) print(len(q)) cpt.stop()
def run_rl(): walls = [5, 6, 13] height = 4 width = 5 m = build_maze(width, height, walls, hit=True) # m = create_maze(8, 8, 0.2) print("1") q,_,_,_ = policy_iteration_q(m, render=0) print("1") pol = get_policy_from_q(q) # print("TD-learning") # temporal_difference(m, pol, render=True) # input("press enter") # print("Q-learning") # q_learning_eps(m, tau=6) plot_ql_sarsa_para(m, 0.001, 6, 1000, 50, 0.5, False)
def run_dyna_prog(): # walls = [14, 15, 16, 31, 45, 46, 47] # height = 6 # width = 9 walls = [5, 6, 13] height = 4 width = 5 m = build_maze(width, height, walls) # maze-like MDP definition print("value iteration V") value_iteration_v(m, render=True) print("value iteration Q") value_iteration_q(m, render=True) print("policy iteration Q") policy_iteration_q(m, render=True) print("policy iteration V") policy_iteration_v(m, render=True) input("press enter")
from maze import build_maze from room import Room maze = build_maze() maze.explore_maze()
from random import randint from math import inf from maze import build_maze, explore_maze, print_maze_paths, mow from graph import dijkstras, a_star, bfs, quicksort from colors import colors #define grid size grid_size_row = 15 grid_size_colmun = 35 #swag list swags = ['*', '+', 'candy corn', 'werewolf', 'pumpkin', 'apple', 'banana', 'diamond', 'melon', 'watermelon', 'kiwi'] # building maze grid, start_i, start_j, end_i, end_j = build_maze(grid_size_row, grid_size_colmun, swags) # rendering maze print( colors.CBLUE2 + " ## Maze ## " + colors.CEND ) print_maze_paths(grid, None) print("\nGrid size row: {0}, column: {1}".format(grid_size_row, grid_size_colmun)) print("Start point: {0},{1}\nEnd point: {2},{3}".format(start_i, start_j, end_i, end_j)) # run dijkstras's algorithm to find a path to the end paths_and_distances, count = dijkstras(grid, start_i, start_j) print("\n" + colors.CBLUE2 + " ## Dijkstras ## " + colors.CEND) print_maze_paths(grid, paths_and_distances[end_i][end_j][1]) # Debug only #print("\ndistances for each cell:\n") #for row in paths_and_distances: