def select_action(policy, board: Board, cuda=False, noise=0): # Get probabilities from neural network state = torch.from_numpy(board.matrix().reshape(BOARD_ROWS * BOARD_COLS)).float().unsqueeze(0) if cuda: state = state.cuda() probs = policy(Variable(state)) # Exclude any results that are not allowed mult_np = np.zeros(len(POSSIBLE_ACTIONS), dtype=np.float32) allowed_actions = board.valid_actions() for i in POSSIBLE_ACTIONS: if i in allowed_actions: mult_np[i] = 1 # Always choose winning move for a in allowed_actions: hypothetical_board = board.insert(a) if hypothetical_board.winner() == board.turn(): mult_np = np.zeros(len(POSSIBLE_ACTIONS), dtype=np.float32) mult_np[a] = 1 mult = Variable(torch.from_numpy(mult_np)) noise = Variable(torch.from_numpy(mult_np * noise)) if cuda: mult = mult.cuda() noise = noise.cuda() probs = probs * mult + noise if torch.sum(probs * mult).data[0] < 1e-40: # Neural network only offered things that are not allowed, so we go for random probs = probs + mult return probs.multinomial()
def test_equals(): b1 = Board.parse(""" |O|X|O|X|O|X|O| |X|O|X|O|X|O|X| |O|X|O|X|O|X|O| |X|O|X|O|X|O|X| |O|X|O|X|O|X|O| |X|O|X|O|X|O|X| """) b2 = Board.parse(""" |O|X|O|X|O|X|O| |X|O|X|O|X|O|X| |O|X|O|X|O|X|O| |X|O|X|O|X|O|X| |O|X|O|X|O|X|O| |X|O|X|O|X|O|X| """) b3 = Board.parse(""" |X|X|O|X|O|X|O| |X|O|X|O|X|O|X| |O|X|O|X|O|X|O| |X|O|X|O|X|O|X| |O|X|O|X|O|X|O| |X|O|X|O|X|O|X| """) assert b1 == b2 assert b2 != b3 assert b1 != b3
def play_game(): board = Board() moves = [] while board.winner == 0 and board.available_moves() != []: col = input() if col == "b": break board.add_token(int(col) - 1) moves.append(int(col) - 1) print(board) print('-' * 30) print(board) print(moves)
def game_board(self): """ Calculates and generates the board object. """ board_height = (self.height // 42) * 42 board_width = (board_height * 7) / 6 bx = board_width / 2 by = board_height / 2 board_corners = [(-bx, by), (-bx, -by), (bx, -by), (bx, by)] self.board = Board(self, board_corners, board_width, board_height, bx, by) return
def _train(self, current_player, against): board = Board() while not board.is_game_over(): if current_player == PLAYER1: if self.player == PLAYER1: column = self.move(board) else: column = against.move(board) current_player = PLAYER2 else: if self.player == PLAYER2: column = self.move(board) else: column = against.move(board) current_player = PLAYER1 board.add_token(column) winner = board.winner self.__feed_reward(winner) self.__reset() return winner
def game_loop(self): board = Board() turn = PLAYER1 self.__draw_board(board) while not self.game_over: for event in pygame.event.get(): if event.type == pygame.QUIT: sys.exit() if turn == PLAYER1: self._choose(board, turn) turn = PLAYER2 elif turn == PLAYER2: self._choose(board, turn) turn = PLAYER1 if board.is_game_over(): if board.winner == NO_ONE: print('Draw') else: print(f'Winner: {board.winner}. player') pygame.time.wait(5000) self.game_over = True
def test_getitem(): b = Board.parse(""" |O|O|O|X|O|X|O| |X|O|X|O|X|O|X| |X|X|O|X|O|X|O| |X|O|X|X| |O|X| |O|X|O|X|O|X|O| |X|O|X|O|X|O|X| """) assert b[0, 0] == 'O' assert b[1, 0] == 'X' assert b[2, 0] == 'X' assert b[0, 1] == 'O' assert b[0, 2] == 'O' assert b[3, 4] is None assert b[5, 6] == 'X'
def test_next_boards(): b = Board.parse(""" | | |O| | | | | | | |X| | | | | | | |O| | | | | | | |X| | | | | | | |O| | | | | | | |X|O|X|O| | """) assert b.next_boards() == [ b.insert(0), b.insert(1), b.insert(3), b.insert(4), b.insert(5), b.insert(6), ]
def play_game(player1, player2): board = Board() current_player = PLAYER1 while not board.is_game_over(): if current_player == PLAYER1: col = player1.move(board) board.add_token(col) current_player = PLAYER2 elif current_player == PLAYER2: col = player2.move(board) board.add_token(col) current_player = PLAYER1 print('.', end='') print() print(board) return board.winner
def generate_session(policy, opponent, cuda=False, t_max=100): """ Play game until end or for t_max rounds. returns: list of states, list of actions and sum of rewards """ states, actions = [], [] total_reward = 0. b = Board() # Decide if we are player 1 or 2 # player = np.random.choice((Board.PLAYER_1, Board.PLAYER_2), 1) player = Board.PLAYER_1 if player == Board.PLAYER_2: # We are player two, let player one play first a = select_action(policy, b, cuda) b = b.insert(a.data[0][0]) for t in range(t_max): # We move states.append(b) a = select_action(policy, b, cuda) actions.append(a) b = b.insert(a.data[0][0]) winner = b.winner() if winner: if winner == player: total_reward = REWARD_WIN elif winner == '-': total_reward = REWARD_UNDECIDED else: print("Invalid result") break # Other player moves b = opponent(policy, b) winner = b.winner() if winner: if winner == '-': total_reward = REWARD_UNDECIDED elif winner != player: total_reward = REWARD_LOOSE else: print("Invalid result") break return states, actions, total_reward
def select_human_action(b): print_board(b) return (click.prompt('Please enter a column', type=int) - 1) % BOARD_COLS def do_human_action(b): while True: try: return b.insert(select_human_action(b)) except ValueError as e: click.echo(click.style(str(e), fg='red')) b = Board() # Decide if computer is player 1 or 2 computer_player = Board.PLAYER_1 if computer_player == Board.PLAYER_2: # Computer is player two, let player one play first b = do_human_action(b) while True: # Computer moves a = select_action(policy, b) b = b.insert(a.data[0][0]) winner = b.winner() if winner:
def test_parse(cols, drawing): b = Board.parse(drawing) assert b.state == cols
def test_draw(cols, drawing): b = Board(cols) assert b.draw().strip() == "\n".join( [l.strip() for l in drawing.split("\n")]).strip()
def test_insert_some_coins(): b = Board() assert b.turn() == 'O' b = b.insert(3) assert b.turn() == 'X' assert b == Board([0, 0, 0, 0b01, 0, 0, 0]) assert b.valid_actions() == (0, 1, 2, 3, 4, 5, 6) b = b.insert(2) assert b.turn() == 'O' assert b == Board([0, 0, 0b10, 0b01, 0, 0, 0]) assert b.valid_actions() == (0, 1, 2, 3, 4, 5, 6) b = b.insert(2) assert b.turn() == 'X' assert b == Board([0, 0, 0b0110, 0b01, 0, 0, 0]) assert b.valid_actions() == (0, 1, 2, 3, 4, 5, 6) b = b.insert(2) assert b.turn() == 'O' assert b == Board([0, 0, 0b100110, 0b01, 0, 0, 0]) assert b.valid_actions() == (0, 1, 2, 3, 4, 5, 6) b = b.insert(2) assert b.turn() == 'X' assert b == Board([0, 0, 0b01100110, 0b01, 0, 0, 0]) assert b.valid_actions() == (0, 1, 2, 3, 4, 5, 6) b = b.insert(2) assert b.turn() == 'O' assert b == Board([0, 0, 0b1001100110, 0b01, 0, 0, 0]) assert b.valid_actions() == (0, 1, 2, 3, 4, 5, 6) b = b.insert(2) assert b.turn() == 'X' assert b == Board([0, 0, 0b011001100110, 0b01, 0, 0, 0]) assert b.valid_actions() == (0, 1, 3, 4, 5, 6)
def test_insert_coins_full(): b = Board([0, 0, 0b011001100110, 0b01, 0, 0, 0]) with pytest.raises(ValueError): b.insert(2)
def test_winner(winner, drawing): b = Board.parse(drawing) assert b.winner() == winner
class MyTestCase(unittest.TestCase): def setUp(self) -> None: super().setUp() self.board = Board() def test_empty_board(self): self.assertEqual(str(self.board), ('[ , , , , , , ]\n' '[ , , , , , , ]\n' '[ , , , , , , ]\n' '[ , , , , , , ]\n' '[ , , , , , , ]\n' '[ , , , , , , ]\n')) def test_first_move(self): self.board.add_token(0) self.assertEqual(str(self.board), ('[ , , , , , , ]\n' '[ , , , , , , ]\n' '[ , , , , , , ]\n' '[ , , , , , , ]\n' '[ , , , , , , ]\n' '[O, , , , , , ]\n')) def test_multiple_moves(self): for i in [0, 2, 1, 2, 6]: self.board.add_token(i) self.assertEqual(str(self.board), ('[ , , , , , , ]\n' '[ , , , , , , ]\n' '[ , , , , , , ]\n' '[ , , , , , , ]\n' '[ , , X, , , , ]\n' '[O, O, X, , , , O]\n')) def test_invalid_moves(self): for i in [1, 1, 1, 1, 1, 1]: self.board.add_token(i) self.assertEqual(str(self.board), ('[ , X, , , , , ]\n' '[ , O, , , , , ]\n' '[ , X, , , , , ]\n' '[ , O, , , , , ]\n' '[ , X, , , , , ]\n' '[ , O, , , , , ]\n')) self.assertFalse(self.board.add_token(1)) def test_empty_board_available_moves(self): self.assertEqual(self.board.available_moves(), [0, 1, 2, 3, 4, 5, 6]) def test_one_full_column_available_moves(self): for i in [1, 1, 1, 1, 1, 1]: self.board.add_token(i) self.assertEqual(str(self.board), ('[ , X, , , , , ]\n' '[ , O, , , , , ]\n' '[ , X, , , , , ]\n' '[ , O, , , , , ]\n' '[ , X, , , , , ]\n' '[ , O, , , , , ]\n')) self.assertEqual(self.board.available_moves(), [0, 2, 3, 4, 5, 6]) def test_full_table_available_moves_and_draw(self): for i in [ 3, 3, 3, 4, 1, 0, 0, 3, 2, 0, 6, 1, 4, 2, 1, 4, 4, 1, 0, 5, 5, 3, 2, 0, 3, 2, 1, 4, 2, 5, 4, 0, 5, 5, 1, 6, 5, 2, 6, 6, 6, 6 ]: self.board.add_token(i) self.assertEqual(str(self.board), ('[X, O, X, O, O, O, X]\n' '[X, O, O, X, X, X, O]\n' '[O, X, X, X, O, O, X]\n' '[X, O, O, O, X, X, O]\n' '[O, X, X, X, O, O, X]\n' '[X, O, O, O, X, X, O]\n')) self.assertEqual(self.board.available_moves(), []) self.assertEqual(self.board.winner, NO_ONE) def test_after_win_available_moves(self): for i in [2, 1, 2, 1, 2, 1, 2]: self.board.add_token(i) self.assertEqual(str(self.board), ('[ , , , , , , ]\n' '[ , , , , , , ]\n' '[ , , O, , , , ]\n' '[ , X, O, , , , ]\n' '[ , X, O, , , , ]\n' '[ , X, O, , , , ]\n')) self.assertEqual(self.board.is_game_over(), True) self.assertFalse(self.board.add_token(3)) self.assertEqual(self.board.available_moves(), []) def test_no_winner_board(self): for i in [0, 1, 2, 3, 2, 3, 2, 4, 6, 6]: self.board.add_token(i) self.assertEqual(str(self.board), ('[ , , , , , , ]\n' '[ , , , , , , ]\n' '[ , , , , , , ]\n' '[ , , O, , , , ]\n' '[ , , O, X, , , X]\n' '[O, X, O, X, X, , O]\n')) self.assertEqual(self.board.winner, NO_ONE) self.assertEqual(self.board.is_game_over(), False) def test_player_one_win(self): for i in [1, 6, 2, 6, 3, 5, 4]: self.board.add_token(i) self.assertEqual(str(self.board), ('[ , , , , , , ]\n' '[ , , , , , , ]\n' '[ , , , , , , ]\n' '[ , , , , , , ]\n' '[ , , , , , , X]\n' '[ , O, O, O, O, X, X]\n')) self.assertEqual(self.board.winner, PLAYER1) self.assertEqual(self.board.is_game_over(), True) def test_player_two_win(self): for i in [0, 1, 6, 2, 6, 3, 5, 4]: self.board.add_token(i) self.assertEqual(str(self.board), ('[ , , , , , , ]\n' '[ , , , , , , ]\n' '[ , , , , , , ]\n' '[ , , , , , , ]\n' '[ , , , , , , O]\n' '[O, X, X, X, X, O, O]\n')) self.assertEqual(self.board.winner, PLAYER2) self.assertEqual(self.board.is_game_over(), True) def test_row_win(self): for i in [0, 1, 2, 1, 2, 3, 4, 5, 6, 1, 3, 6, 5, 0, 4]: self.board.add_token(i) self.assertEqual(str(self.board), ('[ , , , , , , ]\n' '[ , , , , , , ]\n' '[ , , , , , , ]\n' '[ , X, , , , , ]\n' '[X, X, O, O, O, O, X]\n' '[O, X, O, X, O, X, O]\n')) self.assertEqual(self.board.winner, PLAYER1) def test_column_win(self): for i in [0, 1, 2, 1, 2, 3, 2, 4, 2]: self.board.add_token(i) self.assertEqual(str(self.board), ('[ , , , , , , ]\n' '[ , , , , , , ]\n' '[ , , O, , , , ]\n' '[ , , O, , , , ]\n' '[ , X, O, , , , ]\n' '[O, X, O, X, X, , ]\n')) self.assertEqual(self.board.winner, PLAYER1) def test_column_not_bottom_win(self): for i in [0, 1, 1, 2, 1, 2, 1, 2, 1]: self.board.add_token(i) self.assertEqual(str(self.board), ('[ , , , , , , ]\n' '[ , O, , , , , ]\n' '[ , O, , , , , ]\n' '[ , O, X, , , , ]\n' '[ , O, X, , , , ]\n' '[O, X, X, , , , ]\n')) self.assertEqual(self.board.winner, PLAYER1) def test_positive_diagonal(self): for i in [0, 1, 1, 2, 2, 3, 2, 3, 3, 5, 3]: self.board.add_token(i) self.assertEqual(str(self.board), ('[ , , , , , , ]\n' '[ , , , , , , ]\n' '[ , , , O, , , ]\n' '[ , , O, O, , , ]\n' '[ , O, O, X, , , ]\n' '[O, X, X, X, , X, ]\n')) self.assertEqual(self.board.winner, PLAYER1) def test_negative_diagonal(self): for i in [0, 0, 0, 0, 1, 1, 2, 1, 4, 2, 6, 3]: self.board.add_token(i) self.assertEqual(str(self.board), ('[ , , , , , , ]\n' '[ , , , , , , ]\n' '[X, , , , , , ]\n' '[O, X, , , , , ]\n' '[X, X, X, , , , ]\n' '[O, O, O, X, O, , O]\n')) self.assertEqual(self.board.winner, PLAYER2) def test_diagonal_center(self): for i in [3, 2, 4, 1, 1, 2, 2, 3, 3, 4, 3, 4, 6, 4, 4]: self.board.add_token(i) self.assertEqual(str(self.board), ('[ , , , , , , ]\n' '[ , , , , O, , ]\n' '[ , , , O, X, , ]\n' '[ , , O, O, X, , ]\n' '[ , O, X, X, X, , ]\n' '[ , X, X, O, O, , O]\n')) self.assertEqual(self.board.winner, PLAYER1) def test_board_hash(self): for i in [3, 2, 4, 1, 1, 2, 2, 3, 3]: self.board.add_token(i) self.assertEqual(str(self.board), ('[ , , , , , , ]\n' '[ , , , , , , ]\n' '[ , , , , , , ]\n' '[ , , O, O, , , ]\n' '[ , O, X, X, , , ]\n' '[ , X, X, O, O, , ]\n')) self.assertEqual(self.board.get_hash(), '000000000000000000000001100001220000221100') def test_deep_copy(self): for i in [3, 2, 4, 1, 1, 2, 2, 3, 3, 5, 4, 6]: self.board.add_token(i) copy = deepcopy(self.board) self.assertNotEqual(self.board, copy) self.assertEqual(str(self.board), str(copy))
def test_coin_count(n_coins, drawing): b = Board.parse(drawing) assert b.number_of_coins() == n_coins
def setUp(self) -> None: super().setUp() self.board = Board()