import numpy as np import matplotlib.pyplot as plt from tqdm import tqdm import mazemaking as mm import sr # simple 1D maze with agent going right. # maze env MAZE_LENGTH = 50 # p.8 of supplement, # the paper said that 500 states were used for 1D maze, it seems to be typo # 50 states shows similar results with the figure 2C. SR_POINT = int(MAZE_LENGTH * 0.75) maze = mm.Maze(x_length=MAZE_LENGTH) dmaze = maze.make_1D() # action on 1D maze ACTION_LT = 0 ACTION_RT = 1 ACTIONS = [ACTION_LT, ACTION_RT] # state information START = [0, 0] END = [0, MAZE_LENGTH - 1] # hyperparameter for updating SR matrix alpha = 0.1 gamma = 0.84 # p.8 of supplement # the paper said that 0.084 gamma was used for 1D maze,
from tqdm import tqdm import mazemaking as mm import sr # simple 1D maze with prefered direction or random policy # maze env MAZE_LENGTH = 300 # p.8 of supplement, SR_POINT = int(MAZE_LENGTH * 0.50) # set xlim for plot. X_LT = int(MAZE_LENGTH * 0.3) X_RT = int(MAZE_LENGTH * 0.7) maze = mm.Maze(x_length=300) dmaze = maze.make_1D() # state information START = [0, 0] END = [0, MAZE_LENGTH - 1] # hyperparameter for updating SR matrix alpha = 0.1 gamma = 0.9 # p.8 of supplement # action policy def choose_action(state, prefered=True): if prefered == True:
import sr # maze env MAZE_X_LENGTH = 40 MAZE_Y_LENGTH = 40 # put barrier B_LENGTH = 20 B_THICKNESS = 1 B_X_POSTION = 10 B_Y_POSTION = 19 maze = mm.Maze(x_length = MAZE_X_LENGTH, y_length = MAZE_Y_LENGTH, \ b_length = B_LENGTH, b_thickness = B_THICKNESS, b_x_position = B_X_POSTION, \ b_y_position = B_Y_POSTION) square_with_barrier = maze.make_barrier_maze_square() SR_CELL = int(((B_Y_POSTION + B_THICKNESS + 1) * MAZE_Y_LENGTH) + \ ((MAZE_X_LENGTH / 2) - 1)) RT_SR_CELL = int(SR_CELL - ((MAZE_X_LENGTH / 2) - 1)) LT_SR_CELL = int(SR_CELL + (MAZE_X_LENGTH / 2)) # action on 2D maze ACTIONS = [maze.ACTION_LT, maze.ACTION_RT, maze.ACTION_UP, maze.ACTION_DW] # state information START = [0, 0] END = [MAZE_Y_LENGTH - 1, MAZE_X_LENGTH - 1]