def __init__(self, *, env: SokobanEnvFast, c: float, max_depth: int, number_of_simulations: int): self.env = env self.c = c self.max_depth = max_depth self.number_of_simulations = number_of_simulations # State can be in multiple tree nodes so we store all explored states self.explored_states2state_node = {} env.reset() init_state = env.clone_full_state() self.root_node = TreeNode(self.get_state_node(init_state), 0) # For now we assume that seed is fixed pass
def check_solution(actions, seed=0, size=8, num_boxes=2): dim_room = (size, size) env = SokobanEnvFast(dim_room=dim_room, num_boxes=num_boxes, seed=seed, penalty_for_step=0) env.reset() rewards = [] for action in actions: observation, reward, done, info = env.step(action) rewards.append(reward) if rewards[-1] >= 10: return 1 else: return 0
def find_solution(size=8, num_boxes=2, time_limit=10, seed=0): dim_room = (size, size) env = SokobanEnvFast(dim_room=dim_room, num_boxes=num_boxes, seed=seed, penalty_for_step=0) # The encoding of the board is described in README board = env.reset() wall = board[:, :, 0] # this is a one-hot encoding of walls # For readibility first we deal with tops and then with rights tops = [] for i in range(dim_room[0]): for j in range(dim_room[1] - 1): if wall[i, j] == 0 and wall[i, j + 1] == 0: tops.append("top(x{}y{},x{}y{})".format(i, j, i, j + 1)) rights = [] for i in range(dim_room[0] - 1): for j in range(dim_room[1]): if wall[i, j] == 0 and wall[i + 1, j] == 0: rights.append("right(x{}y{},x{}y{})".format(i, j, i + 1, j)) boxes_initial_locations = board[:, :, 4] boxes_initial = [] for i in range(dim_room[0]): for j in range(dim_room[1]): if boxes_initial_locations[i, j] == 1: boxes_initial.append("box(x{}y{})".format(i, j)) boxes_target_locations = board[:, :, 2] + board[:, :, 3] + board[:, :, 6] boxes_target = [] for i in range(dim_room[0]): for j in range(dim_room[1]): if boxes_target_locations[i, j] == 1: boxes_target.append("solution(x{}y{})".format(i, j)) sokoban_initial_location = board[:, :, 5] + board[:, :, 6] for i in range(dim_room[0]): for j in range(dim_room[1]): if sokoban_initial_location[i, j] == 1: sokoban_string = "sokoban(x{}y{})".format(i, j) break tops_string = "[" + ','.join(tops) + ']' rights_string = "[" + ','.join(rights) + ']' boxes_initial_string = "[" + ','.join(boxes_initial) + ']' boxes_target_string = "[" + ','.join(boxes_target) + ']' prolog = Prolog() prolog.consult("sokoban.pl") query = "call_with_time_limit({},solve([{},{},{},{},{}],Solution))".format( time_limit, tops_string, rights_string, boxes_initial_string, boxes_target_string, sokoban_string) print(query) try: result = list(prolog.query(query)) rewards = [] for i, r in enumerate(result): solution = r['Solution'] actions = [] for index in range(len(solution)): move = str(solution[index]).split()[-1] move = move[:-1] action = map_moves(move) actions.append(action) observation, reward, done, info = env.step(action) rewards.append(reward) print("Last return {}".format(rewards[-1])) if rewards[-1] >= 10: return 1, actions return 0, [] except: return 0, []
def find_solution(size=8, num_boxes=2, time_limit=10): dim_room = (size, size) env = SokobanEnvFast(dim_room=dim_room, num_boxes=num_boxes, seed=0) # The encoding of the board is described in README board = env.reset() wall = board[:, :, 0] # this is a one-hot encoding of walls # For readibility first we deal with tops and then with rights # print("Walls {}".format(wall)) # print("Walls shape {}".format(wall.shape)) tops = [] for i in range(dim_room[0]): for j in range(dim_room[1] - 1): if wall[i, j] == 0 and wall[i, j + 1] == 0: tops.append("top(x{}y{},x{}y{})".format(i, j, i, j + 1)) rights = [] for i in range(dim_room[0] - 1): for j in range(dim_room[1]): if wall[i, j] == 0 and wall[i + 1, j] == 0: rights.append("right(x{}y{},x{}y{})".format(i, j, i + 1, j)) boxes_initial_locations = board[:, :, 4] # print("boxes_initial_locations {}".format(boxes_initial_locations)) # print("boxes_initial_locations shape {}".format(boxes_initial_locations.shape)) boxes_initial = [] for i in range(dim_room[0]): for j in range(dim_room[1]): if boxes_initial_locations[i, j] == 1: boxes_initial.append("box(x{}y{})".format(i, j)) boxes_target_locations = board[:, :, 2] # print("boxes_target_locations {}".format(boxes_target_locations)) # print("boxes_target_locations shape {}".format(boxes_target_locations.shape)) boxes_target = [] for i in range(dim_room[0]): for j in range(dim_room[1]): if boxes_target_locations[i, j] == 1: boxes_target.append("solution(x{}y{})".format(i, j)) sokoban_initial_location = board[:, :, 5] + board[:, :, 6] # print("sokoban_initial_location {}".format(sokoban_initial_location)) # print("sokoban_initial_location shape {}".format(sokoban_initial_location.shape)) for i in range(dim_room[0]): for j in range(dim_room[1]): if sokoban_initial_location[i, j] == 1: sokoban_string = "sokoban(x{}y{})".format(i, j) break tops_string = "[" + ','.join(tops) + ']' rights_string = "[" + ','.join(rights) + ']' boxes_initial_string = "[" + ','.join(boxes_initial) + ']' boxes_target_string = "[" + ','.join(boxes_target) + ']' print("Tops {}".format(tops_string)) print("Rights {}".format(rights_string)) print("Boxes initial locations {}".format(boxes_initial_string)) print("Boxes target locations {}".format(boxes_target_string)) print("Sokoban initial location {}".format(sokoban_string)) prolog = Prolog() prolog.consult("sokoban.pl") query = "call_with_time_limit({},solve([{},{},{},{},{}],Solution))".format( time_limit, tops_string, rights_string, boxes_initial_string, boxes_target_string, sokoban_string) # query=example_query print(query) # try: result = list(prolog.query(query)) rewards = [] print(board[:, :, 0] + 2 * board[:, :, 2] + 3 * board[:, :, 3] + 4 * board[:, :, 4] + 5 * board[:, :, 5] + 6 * board[:, :, 6]) for i, r in enumerate(result): solution = r['Solution'] actions = [] print("Solution {}: {}".format(i, solution)) for index in range(len(solution)): print("Solution at index {}".format(solution[index])) move = str(solution[index]).split()[-1] print("Move {}".format(move)) move = move[:-1] action = map_moves(move) print("Move {} Action {}".format(move, action)) actions.append(action) obs = env.step(action) board = obs[0] print(board[:, :, 0] + 2 * board[:, :, 2] + 3 * board[:, :, 3] + 4 * board[:, :, 4] + 5 * board[:, :, 5] + 6 * board[:, :, 6]) rewards.append(obs[1]) print("Actions: {}".format(actions)) print("Rewards: {}".format(rewards))
from mcts import SokoMCTS from gym_sokoban.envs.sokoban_env_fast import SokobanEnvFast from PIL import Image env = SokobanEnvFast(dim_room=(10, 10), num_boxes=4, seed=3) # Show room env.reset() Image.fromarray(env.render(mode="rgb_array")).show() mcts = SokoMCTS(env=env, c=1, max_depth=5000, number_of_simulations=4) mcts.run(passes=1000, verbose=1) graph = mcts.get_graph() graph.view(cleanup=True)
def find_solution(size=8, num_boxes=2, time_limit=10, seed=0, verbose=1): dim_room = (size, size) env = SokobanEnvFast(dim_room=dim_room, num_boxes=num_boxes, seed=seed, penalty_for_step=0) # The encoding of the board is described in README board = env.reset() wall = board[:, :, 0] # this is a one-hot encoding of walls # For readibility first we deal with tops and then with rights tops = [] for i in range(dim_room[0]): for j in range(dim_room[1] - 1): if wall[i, j] == 0 and wall[i, j + 1] == 0: tops.append("top(x{}y{},x{}y{})".format(i, j, i, j + 1)) rights = [] for i in range(dim_room[0] - 1): for j in range(dim_room[1]): if wall[i, j] == 0 and wall[i + 1, j] == 0: rights.append("right(x{}y{},x{}y{})".format(i, j, i + 1, j)) boxes_initial_locations = board[:, :, 4] boxes_initial = [] for i in range(dim_room[0]): for j in range(dim_room[1]): if boxes_initial_locations[i, j] == 1: boxes_initial.append("box(x{}y{})".format(i, j)) boxes_target_locations = board[:, :, 2] boxes_target = [] for i in range(dim_room[0]): for j in range(dim_room[1]): if boxes_target_locations[i, j] == 1: boxes_target.append("solution(x{}y{})".format(i, j)) sokoban_initial_location = board[:, :, 5] + board[:, :, 6] for i in range(dim_room[0]): for j in range(dim_room[1]): if sokoban_initial_location[i, j] == 1: sokoban_string = "sokoban(x{}y{})".format(i, j) break tops_string = "[" + ','.join(tops) + ']' rights_string = "[" + ','.join(rights) + ']' boxes_initial_string = "[" + ','.join(boxes_initial) + ']' boxes_target_string = "[" + ','.join(boxes_target) + ']' prolog = Prolog() if swipl_major_version < 8: if verbose: print( "Warning: using sokoban_swipl7.pl for compatibility with SWI-Prolog version 7" ) prolog.consult("sokoban_swipl7.pl") else: prolog.consult("sokoban.pl") query = "call_with_time_limit({},solve([{},{},{},{},{}],Solution))".format( time_limit, tops_string, rights_string, boxes_initial_string, boxes_target_string, sokoban_string) if swipl_major_version < 8: query = "use_module(library(time))," + query if verbose: print(query) try: result = list(prolog.query(query)) rewards = [] for i, r in enumerate(result): solution = r['Solution'] actions = [] frame = [] for index in range(len(solution)): move = str(solution[index]).split()[-1] move = move[:-1] action = map_moves(move) actions.append(action) observation, reward, done, info = env.step(action) arr = scale(env.render(mode="rgb_array"), 4) frame.append(Image.fromarray(arr)) rewards.append(reward) render_video(frame, f"{seed}_{i}_size{size}x{size}") if verbose: print("Last return {}".format(rewards[-1])) if rewards[-1] >= 10: return 1, actions else: return 0, [] except: return 0, []