def get_cliff(gamma): ascii_room = """ ####### # !!! # # # # # # # #######"""[1:].split('\n') ascii_room = [row.strip() for row in ascii_room] # a = list(ascii_room[goal[0]]) # a[goal[1]] = "g" # ascii_room[goal[0]] = "".join(a) char_matrix = get_char_matrix(ascii_room) grid_size = len(char_matrix[0]) reward_spec = {(1, grid_size - 2): 1} builder = emdp.gridworld.builder_tools.TransitionMatrixBuilder( grid_size=grid_size, has_terminal_state=False) R = create_reward_matrix(builder.P.shape[0], builder.grid_size, reward_spec, action_space=builder.P.shape[1]) walls, empty_, bomb = ascii_to_walls(char_matrix) # hacks empty = [] for e in empty_: (e, ), = flatten_state(e, grid_size, grid_size * grid_size).nonzero() # R[e, :] = -1 empty.append(e) builder.add_grid(p_success=1, terminal_states=[(1, grid_size - 2)]) for (r, c) in walls: builder.add_wall_at((r, c)) P = builder.P for (r, c) in bomb: idx = grid_size * r + c R[idx, 0] = -100 # left R[idx, 1] = -100 # rigth R[idx, 2] = -100 # up R[idx, 3] = -100 # down # R[idx, 1] = -100 P[idx, :, :] = 0 P[idx, :, 8] = 1 p0 = np.zeros(R.shape[0]) p0[flatten_state((1, 1), grid_size, grid_size * grid_size).nonzero()] = 1. # p0[empty] = 1 / len(empty) gw = GridWorldMDP(P, R, gamma, p0, terminal_states=[(1, grid_size - 2)], size=builder.grid_size) return gw
def four_states_gw(goal): builder = emdp.gridworld.builder_tools.TransitionMatrixBuilder( grid_size=3, has_terminal_state=False) builder.add_grid([], p_success=1) builder.add_wall_at((0, 0)) builder.add_wall_at((0, 1)) builder.add_wall_at((0, 2)) builder.add_wall_at((1, 0)) builder.add_wall_at((1, 2)) reward_spec = {goal: +1} R = create_reward_matrix(builder.P.shape[0], builder.grid_size, reward_spec, action_space=builder.P.shape[1]) # target_state = pos_to_state(builder, (2, 2)) p0 = flatten_state((1, 1), builder.grid_size, R.shape[0]) # p0[[2, 4, 5, 8]] = .25 gw = GridWorldMDP(builder.P, R, 0.9, p0, terminal_states=(), size=builder.grid_size) return gw
def four_rooms_gw(gamma): ascii_room = """ ########## # ! ! # # ! # !# # ! # ### ! # # !! # # !! !# # ! ! ! # # ! # ##########"""[1:].split('\n') ascii_room = [row.strip() for row in ascii_room] char_matrix = get_char_matrix(ascii_room) grid_size = len(char_matrix[0]) reward_spec = {(grid_size - 2, grid_size - 2): +1} builder = emdp.gridworld.builder_tools.TransitionMatrixBuilder( grid_size=grid_size, has_terminal_state=False) walls, empty_, bomb = ascii_to_walls(char_matrix) # hacks empty = [] for e in empty_: (e, ), = flatten_state(e, grid_size, grid_size * grid_size).nonzero() empty.append(e) builder.add_grid(p_success=1, terminal_states=[(grid_size - 2, grid_size - 2)]) for (r, c) in walls: builder.add_wall_at((r, c)) for r, c in bomb: builder.add_wall_at((r, c)) R = create_reward_matrix(builder.P.shape[0], builder.grid_size, reward_spec, action_space=builder.P.shape[1]) #for b in bomb: # idx = grid_size * b[0] + b[1] # R[idx, :] = -10 p0 = np.zeros(R.shape[0]) p0[grid_size + 1] = 1 gw = GridWorldMDP(builder.P, R, gamma, p0, terminal_states=[(grid_size - 2, grid_size - 2)], size=builder.grid_size) return gw
def pos_to_state(builder, pos): target_state = flatten_state(pos, builder.grid_size, builder.state_space) target_state = target_state.argmax() return target_state