Example #1
0
def test_spawning_random_boards():
    board0 = Board.init_random((100, 100), 5)
    assert np.argwhere(board0.as_array()).shape == (5, 2)

    board1 = Board.init_random((4, 4), 2)
    assert np.argwhere(board1.as_array()).shape == (2, 2)

    board2 = Board.init_random((5, 5), 0)
    assert np.argwhere(board2.as_array()).shape == (0, 2)
Example #2
0
def test_game_over_checking_works_as_expected():
    not_game_over_strings = ["1 1 1 1", "0 0 0 0", "0 1 0 0", "3 2 1 3 4 5 6 7 8"]
    for s in not_game_over_strings:
        board = board_from_string(s)
        assert not Board.is_game_over(board)

    game_over_strings = ["1 2 3 4", "1 2 3 4 5 6 7 8 9", "4 3 7 2"]
    for s in game_over_strings:
        board = board_from_string(s)
        assert Board.is_game_over(board)
Example #3
0
    def test_get_empty_positions(self):

        board = Board()

        board.move(PLAYER1, 2, 1)
        board.move(PLAYER2, 1, 0)

        emptyPositions = board.get_empty_positions()

        assert [1, 0] not in emptyPositions
        assert [2, 1] not in emptyPositions
Example #4
0
def test_get_neighbors():
    b = Board()
    assert set(b.get_neighbors(1, 1)) == {
        (0, 0),
        (0, 1),
        (0, 2),
        (1, 0),
        (1, 2),
        (2, 0),
        (2, 1),
        (2, 2),
    }
Example #5
0
def test_next_status_die_overpopulation():
    b = Board()
    b.cells[0][1] = True
    b.cells[1][2] = True
    b.cells[0][0] = True
    b.cells[1][0] = True
    b.cells[1][1] = True
    assert b.get_next_status(1, 1) is False
Example #6
0
def run():
    n = 4
    width, height = 6, 6
    model_file = os.path.join(results_dir, "zero_17_4_15:36",
                              "policy_1450.model")
    try:
        board = Board(width=width, height=height, n_in_row=n)
        game = Game(board)

        # ############### human VS AI ###################
        # load the trained policy_value_net in either Theano/Lasagne, PyTorch or TensorFlow

        best_policy = PolicyValueNet(width, height, model_file=model_file)
        mcts_player = MCTSPlayer(best_policy.policy_value_fn,
                                 c_puct=5,
                                 n_playout=400)

        # uncomment the following line to play with pure MCTS (it's much weaker even with a larger n_playout)
        # mcts_player = MCTS_Pure(c_puct=5, n_playout=1000)

        # human player, input your move in the format: 2 3
        human = Human()

        # set start_player=0 for human first
        game.start_play(human, mcts_player, start_player=1, is_shown=1)
    except KeyboardInterrupt:
        print('\n\rquit')
Example #7
0
def test_next_status_relive():
    b = Board()
    b.cells[0][1] = True
    b.cells[1][2] = True
    b.cells[0][0] = True
    b.cells[1][1] = False
    assert b.get_next_status(1, 1) is True
Example #8
0
def test_next_status_stay_alive_3():
    b = Board()
    b.cells[0][1] = True
    b.cells[1][2] = True
    b.cells[0][0] = True
    b.cells[1][1] = True
    assert b.get_next_status(1, 1) is True
Example #9
0
    def _test_swipe(self, before, left, right, up, down):
        """Helper function. All arguments are strings"""
        bfs = board_from_string
        actions = [Action.LEFT, Action.RIGHT, Action.UP, Action.DOWN]
        for s, a in zip([left, right, up, down], actions):
            B0 = bfs(before)
            actual = Board.apply_action_on_board(B0, a)
            expected = board_from_string(s)

            err_msg = f"Action {a}: Got {actual.render()}, expected {expected.render()}"
            assert actual == expected, err_msg
Example #10
0
    def step(self, action: int):
        """A single step in the game

        rewards: the natural logarithm of difference of the 2048 scoring.
        We also add some penalty in case of no-op
        """
        self._step_counter += 1

        done = False
        if self._step_counter > 100 and self._step_counter / (self.t + 1) > 5:
            # add 1 to avoid DivisionByZero for `self.t`. Yes, it happenend.
            done = True

        action = Action(1 + action)  # discrete -> enum (which is 1-indexed)
        modified_board = Board.apply_action_on_board(self.board, action)
        info = {"step": self.t}

        if Board.is_game_over(modified_board):
            done = True
            reward = 0
        else:

            # An action is invalid if it doesn't change the board.
            valid_action = modified_board != self.board
            if not valid_action:
                # We penalize the agent for doing no-op moves!!! >:(
                penalty = -0.1
                info["no-op"] = True
            else:
                modified_board = Board.spawn_random_tile(modified_board)
                penalty = 0
                self.t += 1
                info["no-op"] = False

            diff = modified_board.score - self.board.score

            reward = np.log(1 + diff) + penalty
            reward = np.clip(reward, -11, 10)  # TODO: move to a wrapper.

        self.board = modified_board
        return self.board, reward, done, info
Example #11
0
def board_from_string(s, shape=None, dtype=np.int32) -> Board:
    """Utility function to create boards that are visually easy to see + validate

    Args:
        s: the string we want to make a board from
        shape: if None, we try to make a square array of `s`
    """
    sep = " "
    arr = np.fromstring(re.sub(r"\s+", sep, s.strip("\n")), sep=sep)
    shape = repeat(int(np.sqrt(arr.size)), 2) if shape is None else shape
    arr = arr.astype(dtype).reshape(*shape)
    return Board.from_array(arr)
Example #12
0
    def observation(self, board):
        """Convert observation to numpy array with a unique channel for each tile.

        A `Board` cannot be used as an observaton. RLlib will complain and crash because
        RLlib expects arrays as observations. Therefore, we convert the `Board` to a
        numpy array, where the first channel has value 1 if it's empty.  The second
        channel correspond to tiles with value 2, the third with value 3 and so on.

        The number of channels in the observation will be `1 + log2(max_tile_value)`.
        For example, `max_tile_value == 256` --> we have 9 tile values.

        Note:
            We assume all tiles are a multiple of 2!

        Returns:
            A dict with the following keys and values:
                - 'valid_action_mask': np.ndarray(4, float)
                    The available actions
                - 'board': np.ndarray((n_rows, n_cols, n_channels), float)
                    The board (in one-hot format).
        """
        channel_indices = np.log2(np.where(board.values == 0, 1, board.values))

        frac_values, _ = np.modf(channel_indices)
        if not frac_values.max() == 0:
            raise ValueError(
                "Unexpected input: got a tile that was not a power of 2. Can't "
                "safely convert observation.")
        channel_indices = channel_indices.astype(int)

        yy, xx = np.meshgrid(*[range(dim) for dim in channel_indices.shape])

        one_hot_board = np.zeros(self.env.observation_space["board"].shape)
        if K.image_data_format() == "channels_first":
            one_hot_board[channel_indices.ravel(),
                          yy.ravel(),
                          xx.ravel()] = 1.0
        else:
            one_hot_board[yy.ravel(),
                          xx.ravel(),
                          channel_indices.ravel()] = 1.0

        valid_action_mask = np.zeros(4)
        for action in Board.get_available_actions(board):
            index = action.value - 1  # enums are 1-indexed, so we subtract by 1.
            valid_action_mask[index] = 1.0

        processed_obs = {
            "valid_action_mask": valid_action_mask,
            "board": one_hot_board
        }
        return processed_obs
Example #13
0
def test_blinker():
    b = Board()
    b.cells[2][1] = True
    b.cells[2][2] = True
    b.cells[2][3] = True
    b.next_step()
    assert all((
        b.cells[3][2],
        b.cells[2][2],
        b.cells[1][2],
    ))
Example #14
0
def test_available_actions_work_as_expected():
    s = """1 2 3
           1 4 5
           6 7 8"""
    board = board_from_string(s)
    available_actions = Board.get_available_actions(board)
    assert available_actions == set([Action.DOWN, Action.UP])

    s = """ 1  2  3  4
            5  6  7  8
            9 10 11 12
           13 14 15 16"""
    board = board_from_string(s)
    available_actions = Board.get_available_actions(board)
    assert available_actions == set([])

    s = """ 1  2  2  4
            5  2  7  8
            9 10 11 12
           13 14 15 16"""
    board = board_from_string(s)
    available_actions = Board.get_available_actions(board)
    assert available_actions == set([Action.LEFT, Action.RIGHT, Action.UP, Action.DOWN])
Example #15
0
 def reset(self):
     self.t = 0
     self._step_counter = 0
     self.board = Board.init_random(shape=self.env_config["board_shape"],
                                    n_tiles=2)
     return self.board
Example #16
0
    def test_move(self):

        board = Board()
        board.move(PLAYER1, 2, 1)
        assert board.board_state[2][1] == PLAYER1
Example #17
0
def test_alive_neighbors():
    b = Board()
    b.cells[0][1] = True
    assert b.get_alive_neighbors(1, 1) == [(0, 1)]
Example #18
0
def train_nn(data_filename,
             board_height=6,
             board_width=6,
             n_in_row=4,
             batch_size=32,
             epochs=15,
             learning_rate=5e-3,
             check_freq=200):

    train = True

    dataset = Dataset(file_name=data_filename, default_bs=batch_size, n_samples=2000, augument=True)


    if train:
        teacher = PolicyValueNet(board_width=board_width,
                                 board_height=board_height,
                                 model_file=os.path.join(results_dir, 'zero_17_4_15:36', 'policy_1500.model'))

        test_x = get_test_x()

        teacher_probs, teacher_value = teacher.policy_value(test_x)
        # teacher_move = np.random.choice(np.arange(36), p=teacher_probs)

        tf.reset_default_graph()

        student = PolicyValueNet(board_width=board_width, board_height=board_height)

        save_dir = os.path.join(results_dir, "student")

        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
    else:
        student = PolicyValueNet(board_width=board_width,
                                 board_height=board_height,
                                 #model_file=os.path.join(results_dir, 'zero_17_4_15:36', 'policy_1500.model'))
                                 model_file=os.path.join(results_dir, "student", 'current_policy.model'))

    board = Board(width=board_width,
                       height=board_height,
                       n_in_row=n_in_row)
    game = Game(board)

    pure_mcts_playout_num = 1000
    c_puct = 5
    n_playouts = 400
    n_games = 10
    log_freq = 50

    counter = 0
    correct_counter = 0
    start_time = time()

    if train:

        while dataset.current_epoch < epochs:

            states, probs, winners = dataset.next_batch()

            if dataset.current_epoch >= 10:
                learning_rate = 1e-3
            elif dataset.current_epoch >= 30:
                learning_rate = 5e-4

            loss, entropy = student.train_step(states, probs, winners, lr=learning_rate)

            if (counter + 1) % log_freq == 0:
                logger.info("{0:} time: {1:4.3f}, loss: {2:.4f}, entropy: {3:.4f}".format(
                    dataset.get_progress(), time() - start_time, loss, entropy))

            counter += 1

            if (counter + 1) % check_freq == 0:
                student_probs, student_value = student.policy_value(test_x)
                # student_move = np.random.choice(np.arange(36), p=student_probs)



                kl = d_kl(student_probs, teacher_probs)
                mse = np.mean(np.square(student_value - teacher_value))
                prob_mse = np.mean(np.square(student_probs - teacher_probs))

                max_abs = np.max(np.abs(student_probs - teacher_probs))

                logger.info("evaluation: Dkl: {:.4}, MSE: {:.4}, prob_mse: {:.5}, max: {:.3}".format(kl, mse, prob_mse, max_abs))


        student.save_model(os.path.join(save_dir, 'current_policy.model'))

    winners = []

    current_mcts_player = MCTSPlayer(student.policy_value_fn,
                                     c_puct=c_puct,
                                     n_playout=n_playouts,
                                     is_selfplay=False)

    pure_mcts_player = MCTS_Pure(c_puct=5, n_playout=pure_mcts_playout_num)

    with trange(n_games) as t:
        for i in t:
            winner = game.start_play(pure_mcts_player,
                                     current_mcts_player,
                                     start_player=1,
                                     is_shown=0)

            winners.append(winner)

            t.set_postfix(wins=sum(np.array(winners) == 2))

    logger.info("Evaluation: n_playouts:{}, wins: {}, ties:{}".format(pure_mcts_playout_num,
                                                                      sum(np.array(winners) == 2),
                                                                      sum(np.array(winners) == -1)))
                    default=1,
                    help='An initial temperature')

args = parser.parse_args()
if args.init_model == 'best_policy.model':
    files = os.listdir(results_dir)
    files.sort()
    most_recent = files[-1]
    args.init_model = os.path.join(results_dir, most_recent,
                                   'best_policy.model')
else:
    args.init_model = os.path.join(results_dir, args.init_model)

logger.info(args)
board = Board(width=args.board_width,
              height=args.board_height,
              n_in_row=args.n_in_row)
game = Game(board)
policy_value_net = PolicyValueNet(args.board_width,
                                  args.board_height,
                                  model_file=args.init_model)
mcts_player = MCTSPlayer(policy_value_net.policy_value_fn,
                         c_puct=args.c_puct,
                         n_playout=args.n_playout,
                         is_selfplay=1)

states_buf, probs_buf, winners_buf = [], [], []
counter = 0
while len(states_buf) < args.n_examples:
    _, (states, probs, winners) = game.start_self_play(mcts_player,
                                                       temp=args.temperature,
Example #20
0
 def setup(self, app_root):
     self.board = Board(app_root)
Example #21
0
class NineMenMorrisGame(Widget):


    def callback(instance):
        if instance.text == "Person":
            print('p')
            NineMenMorrisGame.against="person"
        elif instance.text=="AI":
            print('a')
            NineMenMorrisGame.against="ai"
        NineMenMorrisGame.phase = 0
        NineMenMorrisGame.popup.dismiss()



    box = BoxLayout(orientation='vertical', padding=(10))
    box.add_widget(Label(text="Play against a person or AI?",font_size=13))
    popup = Popup(title='Select Opponent', title_size=(30),
                  title_align='center', content=box,
                  size_hint=(None, None), size=(200, 200),
                  auto_dismiss=False)
    box.add_widget(Button(text="Person", on_press=callback))
    box.add_widget(Button(text="AI", on_press=callback))
    popup.open()

    phase = 2
    against = "none"
    turn = 1
    validTurn = False
    lastPhase = 0


    white1 = ObjectProperty(None)
    white2 = ObjectProperty(None)
    white3 = ObjectProperty(None)
    white4 = ObjectProperty(None)
    white5 = ObjectProperty(None)
    white6 = ObjectProperty(None)
    white7 = ObjectProperty(None)
    white8 = ObjectProperty(None)
    white9 = ObjectProperty(None)

    black1 = ObjectProperty(None)
    black2 = ObjectProperty(None)
    black3 = ObjectProperty(None)
    black4 = ObjectProperty(None)
    black5 = ObjectProperty(None)
    black6 = ObjectProperty(None)
    black7 = ObjectProperty(None)
    black8 = ObjectProperty(None)
    black9 = ObjectProperty(None)

    def setup(self, app_root):
        self.board = Board(app_root)



    def on_touch_down(self, touch):
        # Black turn
        if not self.turn % 2:
            print('phase {}'.format(self.phase))
            self.board.prevBlackMills = self.board.blackMills()
            #Human Player for Black Pieces
            # Placement phase for person
            if self.against == "person" and self.phase == 0:
                pieceName = 'black' + str(int((self.turn + 1) / 2))
                piece = getattr(self, pieceName)
                self.validTurn = self.board.place(piece, touch)

            # Moving phase for person
            if self.against == "person" and self.phase == 1:
                if self.board.selected:
                    self.validTurn = self.board.move(touch, 'black')
                else:
                    self.board.select(touch, 'black')

            # Removing phase for person
            if self.against == "person" and self.phase == 2:
                self.validTurn = self.board.remove(touch, 'white')
                if self.validTurn:
                    self.phase = self.lastPhase

            # AI Player for Black Pieces
            # Placement phase for ai
            if self.against == "ai" and self.phase == 0:
                pieceName = 'black' + str(int((self.turn + 1) / 2))
                piece = getattr(self, pieceName)
                #self.validTurn = self.board.place(piece, touch)
                self.validTurn = self.board.placeAI(piece)

            # Moving phase for ai
            if self.against == "ai" and self.phase == 1:
                if self.board.selected:
                    self.validTurn = self.board.moveAI('black')
                else:
                    self.board.selectAI('black')

            # Removing phase for ai
            if self.against == "ai" and self.phase == 2:
                #self.validTurn = self.board.remove(touch, 'white')
                self.validTurn = self.board.removeAI('white')
                if self.validTurn:
                    self.phase = self.lastPhase



            # Check for new mills
            if self.board.blackMills() > self.board.prevBlackMills:
                print('black made a new mill')
                self.validTurn = False  # still your turn
                self.lastPhase = self.phase  # last phase
                self.phase = 2  # next click will be removal

        else:
            print('phase {}'.format(self.phase))
            self.board.prevWhiteMills = self.board.whiteMills()

            # Placement phase
            if self.phase == 0:
                pieceName = 'white' + str(int((self.turn + 1) / 2))
                piece = getattr(self, pieceName)
                self.validTurn = self.board.place(piece, touch)

            # Moving phase
            if self.phase == 1:
                if self.board.selected:
                    self.validTurn = self.board.move(touch, 'white')
                else:
                    self.board.select(touch, 'white')

            # Removing phase
            if self.phase == 2:
                self.validTurn = self.board.remove(touch, 'black')
                if self.validTurn:
                    self.phase = self.lastPhase

            # Check for new mills
            if self.board.whiteMills() > self.board.prevWhiteMills:
                print('white made a new mill')
                self.validTurn = False  # still your turn
                self.lastPhase = self.phase  # last phase
                self.phase = 2  # next click will be removal

        if self.validTurn:
            self.turn += 1
            self.validTurn = False
            if self.turn % 2:
                print('black - phase {} - mills {}'.format(self.phase, self.board.blackMills()))
            else:
                print('white - phase {} - mills {}'.format(self.phase, self.board.whiteMills()))

        if self.turn > 18 and self.phase != 2:
            self.phase = 1

        if self.board.trashedBlack >= 7 or self.board.trashedWhite >= 7:
            print('game over')
Example #22
0
    def test_checkStatus_in_progress(self):

        board = Board()

        board.move(PLAYER1, 0, 0)
        board.move(PLAYER1, 0, 1)
        board.move(PLAYER2, 0, 2)
        board.move(PLAYER2, 1, 0)
        board.move(PLAYER1, 1, 1)
        board.move(PLAYER1, 1, 2)
        board.move(PLAYER1, 2, 0)
        board.move(PLAYER2, 2, 1)

        emptyPositions = board.get_empty_positions()

        assert len(emptyPositions) == 1
        assert board.checkStatus() == -1
Example #23
0
def test_create_board():
    b = Board()
    assert len(b.cells) == Config.board_size[0]
    assert len(b.cells[0]) == Config.board_size[1]
    assert not any((any(c) for c in b.cells))
Example #24
0
    def test_checkStatus_player_one_win(self):

        board = Board()

        board.move(PLAYER1, 0, 0)
        board.move(PLAYER1, 0, 1)
        board.move(PLAYER2, 0, 2)
        board.move(PLAYER2, 1, 0)
        board.move(PLAYER1, 1, 1)
        board.move(PLAYER1, 1, 2)
        board.move(PLAYER2, 2, 0)
        board.move(PLAYER1, 2, 1)

        emptyPositions = board.get_empty_positions()

        assert len(emptyPositions) == 0
        assert board.checkStatus() == 1
Example #25
0
    def __init__(self,
                 init_model=None,
                 board_width=6,
                 board_height=6,
                 n_in_row=4,
                 learning_rate=2e-3,
                 n_playouts=400,
                 batch_size=512,
                 train_steps=5,
                 check_freq=100,
                 n_iters=1500,
                 save_dir=None,
                 debug=False):

        # params of the board and the game
        self.board_width = board_width
        self.board_height = board_height
        self.n_in_row = n_in_row

        self.board = Board(width=self.board_width,
                           height=self.board_height,
                           n_in_row=self.n_in_row)
        self.game = Game(self.board)

        # training params
        self.learning_rate = learning_rate
        self.lr_multiplier = 1.0  # adaptively adjust the learning rate based on KL
        self.initial_temp = 1.0  # the temperature param

        self.n_playouts = n_playouts  # num of simulations for each move

        self.c_puct = 5
        self.buffer_size = 10000
        self.batch_size = batch_size  # mini-batch size for training

        self.states_buffer = deque(maxlen=self.buffer_size)
        self.probs_buffer = deque(maxlen=self.buffer_size)
        self.winners_buffer = deque(maxlen=self.buffer_size)

        self.play_batch_size = 1
        self.train_steps = train_steps  # num of train_steps for each update
        self.kl_targ = 0.02
        self.check_freq = check_freq
        self.game_batch_num = n_iters
        self.best_win_ratio = 0.0
        # num of simulations used for the pure mcts, which is used as
        # the opponent to evaluate the trained policy
        self.pure_mcts_playout_num = 1000

        self.save_dir = save_dir
        self.debug = debug

        self.save_freq = 100

        if init_model:
            # start training from an initial policy-value net
            self.policy_value_net = PolicyValueNet(self.board_width,
                                                   self.board_height,
                                                   model_file=init_model)
        else:
            # start training from a new policy-value net
            self.policy_value_net = PolicyValueNet(self.board_width,
                                                   self.board_height)

        self.mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn,
                                      c_puct=self.c_puct,
                                      n_playout=self.n_playouts,
                                      is_selfplay=True)