Esempio n. 1
0
def select_action(policy, board: Board, cuda=False, noise=0):
    # Get probabilities from neural network
    state = torch.from_numpy(board.matrix().reshape(BOARD_ROWS * BOARD_COLS)).float().unsqueeze(0)
    if cuda:
        state = state.cuda()
    probs = policy(Variable(state))

    # Exclude any results that are not allowed
    mult_np = np.zeros(len(POSSIBLE_ACTIONS), dtype=np.float32)
    allowed_actions = board.valid_actions()
    for i in POSSIBLE_ACTIONS:
        if i in allowed_actions:
            mult_np[i] = 1

    # Always choose winning move
    for a in allowed_actions:
        hypothetical_board = board.insert(a)
        if hypothetical_board.winner() == board.turn():
            mult_np = np.zeros(len(POSSIBLE_ACTIONS), dtype=np.float32)
            mult_np[a] = 1

    mult = Variable(torch.from_numpy(mult_np))
    noise = Variable(torch.from_numpy(mult_np * noise))
    if cuda:
        mult = mult.cuda()
        noise = noise.cuda()

    probs = probs * mult + noise
    if torch.sum(probs * mult).data[0] < 1e-40:
        # Neural network only offered things that are not allowed, so we go for random
        probs = probs + mult
    return probs.multinomial()
Esempio n. 2
0
def test_equals():
    b1 = Board.parse("""
        |O|X|O|X|O|X|O|
        |X|O|X|O|X|O|X|
        |O|X|O|X|O|X|O|
        |X|O|X|O|X|O|X|
        |O|X|O|X|O|X|O|
        |X|O|X|O|X|O|X|
        """)
    b2 = Board.parse("""
        |O|X|O|X|O|X|O|
        |X|O|X|O|X|O|X|
        |O|X|O|X|O|X|O|
        |X|O|X|O|X|O|X|
        |O|X|O|X|O|X|O|
        |X|O|X|O|X|O|X|
        """)
    b3 = Board.parse("""
        |X|X|O|X|O|X|O|
        |X|O|X|O|X|O|X|
        |O|X|O|X|O|X|O|
        |X|O|X|O|X|O|X|
        |O|X|O|X|O|X|O|
        |X|O|X|O|X|O|X|
        """)
    assert b1 == b2
    assert b2 != b3
    assert b1 != b3
Esempio n. 3
0
def play_game():
    board = Board()
    moves = []
    while board.winner == 0 and board.available_moves() != []:
        col = input()
        if col == "b":
            break
        board.add_token(int(col) - 1)
        moves.append(int(col) - 1)
        print(board)

    print('-' * 30)
    print(board)
    print(moves)
Esempio n. 4
0
 def game_board(self):
     """ Calculates and generates the board object. """
     board_height = (self.height // 42) * 42
     board_width = (board_height * 7) / 6
     bx = board_width / 2
     by = board_height / 2
     board_corners = [(-bx, by), (-bx, -by), (bx, -by), (bx, by)]
     self.board = Board(self, board_corners, board_width, board_height, bx,
                        by)
     return
Esempio n. 5
0
 def _train(self, current_player, against):
     board = Board()
     while not board.is_game_over():
         if current_player == PLAYER1:
             if self.player == PLAYER1:
                 column = self.move(board)
             else:
                 column = against.move(board)
             current_player = PLAYER2
         else:
             if self.player == PLAYER2:
                 column = self.move(board)
             else:
                 column = against.move(board)
             current_player = PLAYER1
         board.add_token(column)
     winner = board.winner
     self.__feed_reward(winner)
     self.__reset()
     return winner
Esempio n. 6
0
    def game_loop(self):
        board = Board()
        turn = PLAYER1
        self.__draw_board(board)
        while not self.game_over:
            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    sys.exit()

            if turn == PLAYER1:
                self._choose(board, turn)
                turn = PLAYER2
            elif turn == PLAYER2:
                self._choose(board, turn)
                turn = PLAYER1
            if board.is_game_over():
                if board.winner == NO_ONE:
                    print('Draw')
                else:
                    print(f'Winner: {board.winner}. player')
                pygame.time.wait(5000)
                self.game_over = True
Esempio n. 7
0
def test_getitem():
    b = Board.parse("""
        |O|O|O|X|O|X|O|
        |X|O|X|O|X|O|X|
        |X|X|O|X|O|X|O|
        |X|O|X|X| |O|X|
        |O|X|O|X|O|X|O|
        |X|O|X|O|X|O|X|
        """)
    assert b[0, 0] == 'O'
    assert b[1, 0] == 'X'
    assert b[2, 0] == 'X'
    assert b[0, 1] == 'O'
    assert b[0, 2] == 'O'
    assert b[3, 4] is None
    assert b[5, 6] == 'X'
Esempio n. 8
0
def test_next_boards():
    b = Board.parse("""
        | | |O| | | | |
        | | |X| | | | |
        | | |O| | | | |
        | | |X| | | | |
        | | |O| | | | |
        | | |X|O|X|O| |
        """)
    assert b.next_boards() == [
        b.insert(0),
        b.insert(1),
        b.insert(3),
        b.insert(4),
        b.insert(5),
        b.insert(6),
    ]
Esempio n. 9
0
def play_game(player1, player2):
    board = Board()
    current_player = PLAYER1

    while not board.is_game_over():
        if current_player == PLAYER1:
            col = player1.move(board)
            board.add_token(col)
            current_player = PLAYER2
        elif current_player == PLAYER2:
            col = player2.move(board)
            board.add_token(col)
            current_player = PLAYER1
        print('.', end='')
    print()
    print(board)
    return board.winner
Esempio n. 10
0
def generate_session(policy, opponent, cuda=False, t_max=100):
    """
    Play game until end or for t_max rounds.
    returns: list of states, list of actions and sum of rewards
    """
    states, actions = [], []
    total_reward = 0.

    b = Board()

    # Decide if we are player 1 or 2
    # player = np.random.choice((Board.PLAYER_1, Board.PLAYER_2), 1)
    player = Board.PLAYER_1

    if player == Board.PLAYER_2:
        # We are player two, let player one play first
        a = select_action(policy, b, cuda)
        b = b.insert(a.data[0][0])

    for t in range(t_max):
        # We move
        states.append(b)
        a = select_action(policy, b, cuda)
        actions.append(a)
        b = b.insert(a.data[0][0])

        winner = b.winner()
        if winner:
            if winner == player:
                total_reward = REWARD_WIN
            elif winner == '-':
                total_reward = REWARD_UNDECIDED
            else:
                print("Invalid result")
            break

        # Other player moves
        b = opponent(policy, b)

        winner = b.winner()
        if winner:
            if winner == '-':
                total_reward = REWARD_UNDECIDED
            elif winner != player:
                total_reward = REWARD_LOOSE
            else:
                print("Invalid result")
            break

    return states, actions, total_reward
Esempio n. 11
0

def select_human_action(b):
    print_board(b)
    return (click.prompt('Please enter a column', type=int) - 1) % BOARD_COLS


def do_human_action(b):
    while True:
        try:
            return b.insert(select_human_action(b))
        except ValueError as e:
            click.echo(click.style(str(e), fg='red'))


b = Board()

# Decide if computer is player 1 or 2
computer_player = Board.PLAYER_1

if computer_player == Board.PLAYER_2:
    # Computer is player two, let player one play first
    b = do_human_action(b)

while True:
    # Computer moves
    a = select_action(policy, b)
    b = b.insert(a.data[0][0])

    winner = b.winner()
    if winner:
Esempio n. 12
0
def test_parse(cols, drawing):
    b = Board.parse(drawing)
    assert b.state == cols
Esempio n. 13
0
def test_draw(cols, drawing):
    b = Board(cols)
    assert b.draw().strip() == "\n".join(
        [l.strip() for l in drawing.split("\n")]).strip()
Esempio n. 14
0
def test_insert_some_coins():
    b = Board()
    assert b.turn() == 'O'
    b = b.insert(3)
    assert b.turn() == 'X'
    assert b == Board([0, 0, 0, 0b01, 0, 0, 0])
    assert b.valid_actions() == (0, 1, 2, 3, 4, 5, 6)
    b = b.insert(2)
    assert b.turn() == 'O'
    assert b == Board([0, 0, 0b10, 0b01, 0, 0, 0])
    assert b.valid_actions() == (0, 1, 2, 3, 4, 5, 6)
    b = b.insert(2)
    assert b.turn() == 'X'
    assert b == Board([0, 0, 0b0110, 0b01, 0, 0, 0])
    assert b.valid_actions() == (0, 1, 2, 3, 4, 5, 6)
    b = b.insert(2)
    assert b.turn() == 'O'
    assert b == Board([0, 0, 0b100110, 0b01, 0, 0, 0])
    assert b.valid_actions() == (0, 1, 2, 3, 4, 5, 6)
    b = b.insert(2)
    assert b.turn() == 'X'
    assert b == Board([0, 0, 0b01100110, 0b01, 0, 0, 0])
    assert b.valid_actions() == (0, 1, 2, 3, 4, 5, 6)
    b = b.insert(2)
    assert b.turn() == 'O'
    assert b == Board([0, 0, 0b1001100110, 0b01, 0, 0, 0])
    assert b.valid_actions() == (0, 1, 2, 3, 4, 5, 6)
    b = b.insert(2)
    assert b.turn() == 'X'
    assert b == Board([0, 0, 0b011001100110, 0b01, 0, 0, 0])
    assert b.valid_actions() == (0, 1, 3, 4, 5, 6)
Esempio n. 15
0
def test_insert_coins_full():
    b = Board([0, 0, 0b011001100110, 0b01, 0, 0, 0])
    with pytest.raises(ValueError):
        b.insert(2)
Esempio n. 16
0
def test_winner(winner, drawing):
    b = Board.parse(drawing)
    assert b.winner() == winner
Esempio n. 17
0
class MyTestCase(unittest.TestCase):
    def setUp(self) -> None:
        super().setUp()
        self.board = Board()

    def test_empty_board(self):
        self.assertEqual(str(self.board), ('[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ ,  ,  ,  ,  ,  ,  ]\n'))

    def test_first_move(self):
        self.board.add_token(0)
        self.assertEqual(str(self.board), ('[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[O,  ,  ,  ,  ,  ,  ]\n'))

    def test_multiple_moves(self):
        for i in [0, 2, 1, 2, 6]:
            self.board.add_token(i)
        self.assertEqual(str(self.board), ('[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ ,  , X,  ,  ,  ,  ]\n'
                                           '[O, O, X,  ,  ,  , O]\n'))

    def test_invalid_moves(self):
        for i in [1, 1, 1, 1, 1, 1]:
            self.board.add_token(i)
        self.assertEqual(str(self.board), ('[ , X,  ,  ,  ,  ,  ]\n'
                                           '[ , O,  ,  ,  ,  ,  ]\n'
                                           '[ , X,  ,  ,  ,  ,  ]\n'
                                           '[ , O,  ,  ,  ,  ,  ]\n'
                                           '[ , X,  ,  ,  ,  ,  ]\n'
                                           '[ , O,  ,  ,  ,  ,  ]\n'))
        self.assertFalse(self.board.add_token(1))

    def test_empty_board_available_moves(self):
        self.assertEqual(self.board.available_moves(), [0, 1, 2, 3, 4, 5, 6])

    def test_one_full_column_available_moves(self):
        for i in [1, 1, 1, 1, 1, 1]:
            self.board.add_token(i)
        self.assertEqual(str(self.board), ('[ , X,  ,  ,  ,  ,  ]\n'
                                           '[ , O,  ,  ,  ,  ,  ]\n'
                                           '[ , X,  ,  ,  ,  ,  ]\n'
                                           '[ , O,  ,  ,  ,  ,  ]\n'
                                           '[ , X,  ,  ,  ,  ,  ]\n'
                                           '[ , O,  ,  ,  ,  ,  ]\n'))
        self.assertEqual(self.board.available_moves(), [0, 2, 3, 4, 5, 6])

    def test_full_table_available_moves_and_draw(self):
        for i in [
                3, 3, 3, 4, 1, 0, 0, 3, 2, 0, 6, 1, 4, 2, 1, 4, 4, 1, 0, 5, 5,
                3, 2, 0, 3, 2, 1, 4, 2, 5, 4, 0, 5, 5, 1, 6, 5, 2, 6, 6, 6, 6
        ]:
            self.board.add_token(i)
        self.assertEqual(str(self.board), ('[X, O, X, O, O, O, X]\n'
                                           '[X, O, O, X, X, X, O]\n'
                                           '[O, X, X, X, O, O, X]\n'
                                           '[X, O, O, O, X, X, O]\n'
                                           '[O, X, X, X, O, O, X]\n'
                                           '[X, O, O, O, X, X, O]\n'))
        self.assertEqual(self.board.available_moves(), [])
        self.assertEqual(self.board.winner, NO_ONE)

    def test_after_win_available_moves(self):
        for i in [2, 1, 2, 1, 2, 1, 2]:
            self.board.add_token(i)
        self.assertEqual(str(self.board), ('[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ ,  , O,  ,  ,  ,  ]\n'
                                           '[ , X, O,  ,  ,  ,  ]\n'
                                           '[ , X, O,  ,  ,  ,  ]\n'
                                           '[ , X, O,  ,  ,  ,  ]\n'))
        self.assertEqual(self.board.is_game_over(), True)
        self.assertFalse(self.board.add_token(3))
        self.assertEqual(self.board.available_moves(), [])

    def test_no_winner_board(self):
        for i in [0, 1, 2, 3, 2, 3, 2, 4, 6, 6]:
            self.board.add_token(i)
        self.assertEqual(str(self.board), ('[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ ,  , O,  ,  ,  ,  ]\n'
                                           '[ ,  , O, X,  ,  , X]\n'
                                           '[O, X, O, X, X,  , O]\n'))
        self.assertEqual(self.board.winner, NO_ONE)
        self.assertEqual(self.board.is_game_over(), False)

    def test_player_one_win(self):
        for i in [1, 6, 2, 6, 3, 5, 4]:
            self.board.add_token(i)
        self.assertEqual(str(self.board), ('[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ ,  ,  ,  ,  ,  , X]\n'
                                           '[ , O, O, O, O, X, X]\n'))
        self.assertEqual(self.board.winner, PLAYER1)
        self.assertEqual(self.board.is_game_over(), True)

    def test_player_two_win(self):
        for i in [0, 1, 6, 2, 6, 3, 5, 4]:
            self.board.add_token(i)
        self.assertEqual(str(self.board), ('[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ ,  ,  ,  ,  ,  , O]\n'
                                           '[O, X, X, X, X, O, O]\n'))
        self.assertEqual(self.board.winner, PLAYER2)
        self.assertEqual(self.board.is_game_over(), True)

    def test_row_win(self):
        for i in [0, 1, 2, 1, 2, 3, 4, 5, 6, 1, 3, 6, 5, 0, 4]:
            self.board.add_token(i)
        self.assertEqual(str(self.board), ('[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ , X,  ,  ,  ,  ,  ]\n'
                                           '[X, X, O, O, O, O, X]\n'
                                           '[O, X, O, X, O, X, O]\n'))
        self.assertEqual(self.board.winner, PLAYER1)

    def test_column_win(self):
        for i in [0, 1, 2, 1, 2, 3, 2, 4, 2]:
            self.board.add_token(i)
        self.assertEqual(str(self.board), ('[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ ,  , O,  ,  ,  ,  ]\n'
                                           '[ ,  , O,  ,  ,  ,  ]\n'
                                           '[ , X, O,  ,  ,  ,  ]\n'
                                           '[O, X, O, X, X,  ,  ]\n'))
        self.assertEqual(self.board.winner, PLAYER1)

    def test_column_not_bottom_win(self):
        for i in [0, 1, 1, 2, 1, 2, 1, 2, 1]:
            self.board.add_token(i)
        self.assertEqual(str(self.board), ('[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ , O,  ,  ,  ,  ,  ]\n'
                                           '[ , O,  ,  ,  ,  ,  ]\n'
                                           '[ , O, X,  ,  ,  ,  ]\n'
                                           '[ , O, X,  ,  ,  ,  ]\n'
                                           '[O, X, X,  ,  ,  ,  ]\n'))
        self.assertEqual(self.board.winner, PLAYER1)

    def test_positive_diagonal(self):
        for i in [0, 1, 1, 2, 2, 3, 2, 3, 3, 5, 3]:
            self.board.add_token(i)
        self.assertEqual(str(self.board), ('[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ ,  ,  , O,  ,  ,  ]\n'
                                           '[ ,  , O, O,  ,  ,  ]\n'
                                           '[ , O, O, X,  ,  ,  ]\n'
                                           '[O, X, X, X,  , X,  ]\n'))
        self.assertEqual(self.board.winner, PLAYER1)

    def test_negative_diagonal(self):
        for i in [0, 0, 0, 0, 1, 1, 2, 1, 4, 2, 6, 3]:
            self.board.add_token(i)
        self.assertEqual(str(self.board), ('[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[X,  ,  ,  ,  ,  ,  ]\n'
                                           '[O, X,  ,  ,  ,  ,  ]\n'
                                           '[X, X, X,  ,  ,  ,  ]\n'
                                           '[O, O, O, X, O,  , O]\n'))
        self.assertEqual(self.board.winner, PLAYER2)

    def test_diagonal_center(self):
        for i in [3, 2, 4, 1, 1, 2, 2, 3, 3, 4, 3, 4, 6, 4, 4]:
            self.board.add_token(i)
        self.assertEqual(str(self.board), ('[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ ,  ,  ,  , O,  ,  ]\n'
                                           '[ ,  ,  , O, X,  ,  ]\n'
                                           '[ ,  , O, O, X,  ,  ]\n'
                                           '[ , O, X, X, X,  ,  ]\n'
                                           '[ , X, X, O, O,  , O]\n'))
        self.assertEqual(self.board.winner, PLAYER1)

    def test_board_hash(self):
        for i in [3, 2, 4, 1, 1, 2, 2, 3, 3]:
            self.board.add_token(i)
        self.assertEqual(str(self.board), ('[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ ,  ,  ,  ,  ,  ,  ]\n'
                                           '[ ,  , O, O,  ,  ,  ]\n'
                                           '[ , O, X, X,  ,  ,  ]\n'
                                           '[ , X, X, O, O,  ,  ]\n'))
        self.assertEqual(self.board.get_hash(),
                         '000000000000000000000001100001220000221100')

    def test_deep_copy(self):
        for i in [3, 2, 4, 1, 1, 2, 2, 3, 3, 5, 4, 6]:
            self.board.add_token(i)
        copy = deepcopy(self.board)
        self.assertNotEqual(self.board, copy)
        self.assertEqual(str(self.board), str(copy))
Esempio n. 18
0
def test_coin_count(n_coins, drawing):
    b = Board.parse(drawing)
    assert b.number_of_coins() == n_coins
Esempio n. 19
0
 def setUp(self) -> None:
     super().setUp()
     self.board = Board()