def test(): rows = 13 cols = 12 g = Grid(rows, cols, (int(rows / 2), 0), (int(cols / 2), cols - 1)) episode_len = 2 * (g.rows + g.cols) episode_num = 50000 Q = g.q_learning(0.6, 0.1, 0.5, episode_len, episode_num) pos_list = g.eval_control(Q, episode_len) print(pos_list) if pos_list[-1] != g.finish: pos_list = [pos_list[0], pos_list[-1]] ani = Draw.Animate(g) ani.show(pos_list)
def test(): rows = 16 cols = 19 g = Grid(rows, cols, (0, 0), (rows-1, cols-1)) episode_len = (g.rows * g.cols) episode_num = 100000 Q = g.q_learning(0.6, 0.1, 0.5, episode_len, episode_num) pos_list = g.eval_control(Q, episode_len) print(pos_list) if pos_list[-1] != g.finish: pos_list = [pos_list[0], pos_list[-1]] ani = Draw.Animate(g) ani.show(pos_list)