def test_winner_correct_when_board_full():
    game = Pylos()

    assert_equal(game.get_winner(), None)

    fill_layer(game, 0)
    fill_layer(game, 1)
    fill_layer(game, 2)
    fill_layer(game, 3)

    assert_equal(game.get_winner(), 1)
def test_winner_correct_when_current_player_no_balls():
    game = Pylos()

    assert game.get_winner() is None

    play_turn(game, None, (0, 0, 0))
    play_turn(game, None, (0, 3, 0))

    play_turn(game, None, (0, 0, 1))
    play_turn(game, None, (0, 3, 1))

    play_turn(game, None, (0, 1, 0))
    play_turn(game, None, (0, 3, 2))

    play_turn(game, None, (0, 1, 1), (0, 1, 0), (0, 1, 1))
    play_turn(game, None, (0, 3, 3))

    play_turn(game, None, (0, 0, 2))
    play_turn(game, None, (0, 0, 3))

    assert_equal(game.render(), "0001/..../..../1111#.../.../...#../..#.")

    # fill rows 2 and 3
    for row in range(1, 3):
        play_turn(game, None, (0, row, 0))
        play_turn(game, None, (0, row, 1))
        play_turn(game, None, (0, row, 2))
        play_turn(game, None, (0, row, 3))

    assert_equal(game.reserve, [8, 6])

    assert_equal(game.render(), "0001/0101/0101/1111#.../.../...#../..#.")

    # fill next layer
    fill_layer(game, 1)
    fill_layer(game, 2)

    assert_equal(game.reserve, [1, 0])
    assert_equal(game.current_player, 1)
    assert_equal(game.render(), "0001/0101/0101/1111#010/101/010#10/10#.")
    assert_equal(game.determine_winner(), 0)
    assert_equal(game.get_winner(), 0)
Beispiel #3
0
class PylosEnv(gym.Env):
    done: bool
    metadata = {}

    def __init__(self):
        self.pylos = Pylos()
        self.done = False

        self.reset()

        # each move consists of 31 possible values (0-29 meaning one of the board positions,
        # 30 meaning 'none' or 'reserve' depending on the current phase)
        # note that for the 'target' phase, 30 is never a valid value
        self.action_space = spaces.Discrete(31)

    def state_from_pylos(self):
        player_onehot = np.array(
            [1 - self.pylos.current_player, self.pylos.current_player])

        # game phase
        phase_onehot = np.array([
            1 if self.pylos.phase == pylos.PHASE_SOURCE_LOCATION else 0,
            1 if self.pylos.phase == pylos.PHASE_TARGET_LOCATION else 0,
            1 if self.pylos.phase == pylos.PHASE_RETRACT1 else 0,
            1 if self.pylos.phase == pylos.PHASE_RETRACT2 else 0
        ])

        # board (3 nodes per board position)
        board_vector = np.array([[
            1 if ball_owner is None else 0 for ball_owner in self.flat_board()
        ], [1 if ball_owner == 0 else 0 for ball_owner in self.flat_board()],
                                 [
                                     1 if ball_owner == 1 else 0
                                     for ball_owner in self.flat_board()
                                 ]]).flatten('F')

        return np.concatenate([player_onehot, phase_onehot, board_vector])

    def flat_board(self):
        result = []
        for l in self.pylos.layers:
            for r in l:
                result += r
        return result

    def reset(self):
        self.done = False
        self.pylos = Pylos()
        return self.state_from_pylos()

    def step(self, action):
        if self.done:
            print("WARNING: step called after done")
            return self.state_from_pylos(), 0, self.done, {
                'warning_step_after_done': True
            }

        location = map_action_to_location(action)

        if not self.pylos.is_valid_move(location):
            return self.create_invalid_move_step_response(location)

        player = self.pylos.current_player
        reserve_before = self.pylos.reserve[player]
        self.pylos.move(location)
        reserve_after = self.pylos.reserve[player]

        winner = self.pylos.get_winner()
        reward = 1
        reward += reserve_after - reserve_before

        if winner is not None:
            reward += 10
            self.done = True

        return self.state_from_pylos(), reward, self.done, {
            'phase': self.pylos.phase
        }

    def render(self):
        if self.pylos.winner is not None:
            print(" *** Winner *** ", self.pylos.winner)
        print("Player: ", self.pylos.current_player)
        print("Phase: ", PHASE_NAMES[self.pylos.phase])
        print("\n".join([self._render_row(r) for r in range(4)]))

    def _render_row(self, row):
        layers = self.pylos.render().split("#")

        split_layers = [layer.split("/") for layer in layers]
        return "  ".join(([
            layer[row] if row < len(layer) else " " * (4 - row)
            for layer in split_layers
        ]))

    def create_invalid_move_step_response(self, location):
        return self.state_from_pylos(), -10, self.done, {'invalid': location}