예제 #1
0
    def __init__(self, *, env: SokobanEnvFast, c: float, max_depth: int,
                 number_of_simulations: int):
        self.env = env
        self.c = c
        self.max_depth = max_depth
        self.number_of_simulations = number_of_simulations

        # State can be in multiple tree nodes so we store all explored states
        self.explored_states2state_node = {}
        env.reset()
        init_state = env.clone_full_state()
        self.root_node = TreeNode(self.get_state_node(init_state),
                                  0)  # For now we assume that seed is fixed
        pass
예제 #2
0
def check_solution(actions, seed=0, size=8, num_boxes=2):
    dim_room = (size, size)

    env = SokobanEnvFast(dim_room=dim_room,
                         num_boxes=num_boxes,
                         seed=seed,
                         penalty_for_step=0)

    env.reset()

    rewards = []
    for action in actions:
        observation, reward, done, info = env.step(action)
        rewards.append(reward)
    if rewards[-1] >= 10:
        return 1
    else:
        return 0
예제 #3
0
def find_solution(size=8, num_boxes=2, time_limit=10, seed=0):
    dim_room = (size, size)

    env = SokobanEnvFast(dim_room=dim_room,
                         num_boxes=num_boxes,
                         seed=seed,
                         penalty_for_step=0)
    # The encoding of the board is described in README
    board = env.reset()

    wall = board[:, :, 0]  # this is a one-hot encoding of walls
    # For readibility first we deal with tops and then with rights
    tops = []
    for i in range(dim_room[0]):
        for j in range(dim_room[1] - 1):
            if wall[i, j] == 0 and wall[i, j + 1] == 0:
                tops.append("top(x{}y{},x{}y{})".format(i, j, i, j + 1))

    rights = []
    for i in range(dim_room[0] - 1):
        for j in range(dim_room[1]):
            if wall[i, j] == 0 and wall[i + 1, j] == 0:
                rights.append("right(x{}y{},x{}y{})".format(i, j, i + 1, j))

    boxes_initial_locations = board[:, :, 4]
    boxes_initial = []
    for i in range(dim_room[0]):
        for j in range(dim_room[1]):
            if boxes_initial_locations[i, j] == 1:
                boxes_initial.append("box(x{}y{})".format(i, j))

    boxes_target_locations = board[:, :, 2] + board[:, :, 3] + board[:, :, 6]
    boxes_target = []
    for i in range(dim_room[0]):
        for j in range(dim_room[1]):
            if boxes_target_locations[i, j] == 1:
                boxes_target.append("solution(x{}y{})".format(i, j))

    sokoban_initial_location = board[:, :, 5] + board[:, :, 6]
    for i in range(dim_room[0]):
        for j in range(dim_room[1]):
            if sokoban_initial_location[i, j] == 1:
                sokoban_string = "sokoban(x{}y{})".format(i, j)
                break

    tops_string = "[" + ','.join(tops) + ']'
    rights_string = "[" + ','.join(rights) + ']'
    boxes_initial_string = "[" + ','.join(boxes_initial) + ']'
    boxes_target_string = "[" + ','.join(boxes_target) + ']'

    prolog = Prolog()
    prolog.consult("sokoban.pl")
    query = "call_with_time_limit({},solve([{},{},{},{},{}],Solution))".format(
        time_limit, tops_string, rights_string, boxes_initial_string,
        boxes_target_string, sokoban_string)

    print(query)
    try:
        result = list(prolog.query(query))
        rewards = []
        for i, r in enumerate(result):
            solution = r['Solution']
            actions = []
            for index in range(len(solution)):
                move = str(solution[index]).split()[-1]
                move = move[:-1]
                action = map_moves(move)
                actions.append(action)
                observation, reward, done, info = env.step(action)
                rewards.append(reward)
            print("Last return {}".format(rewards[-1]))
            if rewards[-1] >= 10:
                return 1, actions
        return 0, []

    except:
        return 0, []
예제 #4
0
def find_solution(size=8, num_boxes=2, time_limit=10):
    dim_room = (size, size)

    env = SokobanEnvFast(dim_room=dim_room, num_boxes=num_boxes, seed=0)
    # The encoding of the board is described in README
    board = env.reset()

    wall = board[:, :, 0]  # this is a one-hot encoding of walls
    # For readibility first we deal with tops and then with rights
    # print("Walls {}".format(wall))
    # print("Walls shape {}".format(wall.shape))
    tops = []
    for i in range(dim_room[0]):
        for j in range(dim_room[1] - 1):
            if wall[i, j] == 0 and wall[i, j + 1] == 0:
                tops.append("top(x{}y{},x{}y{})".format(i, j, i, j + 1))

    rights = []
    for i in range(dim_room[0] - 1):
        for j in range(dim_room[1]):
            if wall[i, j] == 0 and wall[i + 1, j] == 0:
                rights.append("right(x{}y{},x{}y{})".format(i, j, i + 1, j))

    boxes_initial_locations = board[:, :, 4]
    # print("boxes_initial_locations {}".format(boxes_initial_locations))
    # print("boxes_initial_locations shape {}".format(boxes_initial_locations.shape))
    boxes_initial = []
    for i in range(dim_room[0]):
        for j in range(dim_room[1]):
            if boxes_initial_locations[i, j] == 1:
                boxes_initial.append("box(x{}y{})".format(i, j))

    boxes_target_locations = board[:, :, 2]
    # print("boxes_target_locations {}".format(boxes_target_locations))
    # print("boxes_target_locations shape {}".format(boxes_target_locations.shape))
    boxes_target = []
    for i in range(dim_room[0]):
        for j in range(dim_room[1]):
            if boxes_target_locations[i, j] == 1:
                boxes_target.append("solution(x{}y{})".format(i, j))

    sokoban_initial_location = board[:, :, 5] + board[:, :, 6]
    # print("sokoban_initial_location {}".format(sokoban_initial_location))
    # print("sokoban_initial_location shape {}".format(sokoban_initial_location.shape))
    for i in range(dim_room[0]):
        for j in range(dim_room[1]):
            if sokoban_initial_location[i, j] == 1:
                sokoban_string = "sokoban(x{}y{})".format(i, j)
                break

    tops_string = "[" + ','.join(tops) + ']'
    rights_string = "[" + ','.join(rights) + ']'
    boxes_initial_string = "[" + ','.join(boxes_initial) + ']'
    boxes_target_string = "[" + ','.join(boxes_target) + ']'

    print("Tops {}".format(tops_string))
    print("Rights {}".format(rights_string))
    print("Boxes initial locations {}".format(boxes_initial_string))
    print("Boxes target locations {}".format(boxes_target_string))
    print("Sokoban initial location {}".format(sokoban_string))

    prolog = Prolog()
    prolog.consult("sokoban.pl")
    query = "call_with_time_limit({},solve([{},{},{},{},{}],Solution))".format(
        time_limit, tops_string, rights_string, boxes_initial_string,
        boxes_target_string, sokoban_string)

    # query=example_query
    print(query)
    # try:
    result = list(prolog.query(query))
    rewards = []

    print(board[:, :, 0] + 2 * board[:, :, 2] + 3 * board[:, :, 3] +
          4 * board[:, :, 4] + 5 * board[:, :, 5] + 6 * board[:, :, 6])
    for i, r in enumerate(result):
        solution = r['Solution']
        actions = []
        print("Solution {}: {}".format(i, solution))
        for index in range(len(solution)):
            print("Solution at index {}".format(solution[index]))
            move = str(solution[index]).split()[-1]
            print("Move {}".format(move))
            move = move[:-1]
            action = map_moves(move)
            print("Move {} Action {}".format(move, action))
            actions.append(action)
            obs = env.step(action)
            board = obs[0]
            print(board[:, :, 0] + 2 * board[:, :, 2] + 3 * board[:, :, 3] +
                  4 * board[:, :, 4] + 5 * board[:, :, 5] + 6 * board[:, :, 6])
            rewards.append(obs[1])
        print("Actions: {}".format(actions))
        print("Rewards: {}".format(rewards))
예제 #5
0
from mcts import SokoMCTS
from gym_sokoban.envs.sokoban_env_fast import SokobanEnvFast
from PIL import Image

env = SokobanEnvFast(dim_room=(10, 10), num_boxes=4, seed=3)

# Show room
env.reset()
Image.fromarray(env.render(mode="rgb_array")).show()

mcts = SokoMCTS(env=env, c=1, max_depth=5000, number_of_simulations=4)
mcts.run(passes=1000, verbose=1)

graph = mcts.get_graph()
graph.view(cleanup=True)
예제 #6
0
def find_solution(size=8, num_boxes=2, time_limit=10, seed=0, verbose=1):
    dim_room = (size, size)

    env = SokobanEnvFast(dim_room=dim_room,
                         num_boxes=num_boxes,
                         seed=seed,
                         penalty_for_step=0)
    # The encoding of the board is described in README
    board = env.reset()

    wall = board[:, :, 0]  # this is a one-hot encoding of walls
    # For readibility first we deal with tops and then with rights
    tops = []
    for i in range(dim_room[0]):
        for j in range(dim_room[1] - 1):
            if wall[i, j] == 0 and wall[i, j + 1] == 0:
                tops.append("top(x{}y{},x{}y{})".format(i, j, i, j + 1))

    rights = []
    for i in range(dim_room[0] - 1):
        for j in range(dim_room[1]):
            if wall[i, j] == 0 and wall[i + 1, j] == 0:
                rights.append("right(x{}y{},x{}y{})".format(i, j, i + 1, j))

    boxes_initial_locations = board[:, :, 4]
    boxes_initial = []
    for i in range(dim_room[0]):
        for j in range(dim_room[1]):
            if boxes_initial_locations[i, j] == 1:
                boxes_initial.append("box(x{}y{})".format(i, j))

    boxes_target_locations = board[:, :, 2]
    boxes_target = []
    for i in range(dim_room[0]):
        for j in range(dim_room[1]):
            if boxes_target_locations[i, j] == 1:
                boxes_target.append("solution(x{}y{})".format(i, j))

    sokoban_initial_location = board[:, :, 5] + board[:, :, 6]
    for i in range(dim_room[0]):
        for j in range(dim_room[1]):
            if sokoban_initial_location[i, j] == 1:
                sokoban_string = "sokoban(x{}y{})".format(i, j)
                break

    tops_string = "[" + ','.join(tops) + ']'
    rights_string = "[" + ','.join(rights) + ']'
    boxes_initial_string = "[" + ','.join(boxes_initial) + ']'
    boxes_target_string = "[" + ','.join(boxes_target) + ']'

    prolog = Prolog()
    if swipl_major_version < 8:
        if verbose:
            print(
                "Warning: using sokoban_swipl7.pl for compatibility with SWI-Prolog version 7"
            )
        prolog.consult("sokoban_swipl7.pl")
    else:
        prolog.consult("sokoban.pl")

    query = "call_with_time_limit({},solve([{},{},{},{},{}],Solution))".format(
        time_limit, tops_string, rights_string, boxes_initial_string,
        boxes_target_string, sokoban_string)
    if swipl_major_version < 8:
        query = "use_module(library(time))," + query
    if verbose:
        print(query)
    try:
        result = list(prolog.query(query))
        rewards = []
        for i, r in enumerate(result):
            solution = r['Solution']
            actions = []
            frame = []
            for index in range(len(solution)):
                move = str(solution[index]).split()[-1]
                move = move[:-1]
                action = map_moves(move)
                actions.append(action)
                observation, reward, done, info = env.step(action)
                arr = scale(env.render(mode="rgb_array"), 4)
                frame.append(Image.fromarray(arr))
                rewards.append(reward)

            render_video(frame, f"{seed}_{i}_size{size}x{size}")

        if verbose:
            print("Last return {}".format(rewards[-1]))
        if rewards[-1] >= 10:
            return 1, actions
        else:
            return 0, []
    except:
        return 0, []