def test_reverse(self): board = Board.init_board() actual = put_and_reverse((4, 5), board) expected = Board.init_board().board expected[4][4] = 1 expected[4][5] = 1 np.testing.assert_array_equal(expected, actual.board)
def test_multi_size_reverse(self): board = Board.init_board() board.board[4][5] = -1 actual = put_and_reverse((4, 6), board) expected = Board.init_board().board expected[4][4] = 1 expected[4][5] = 1 expected[4][6] = 1 np.testing.assert_array_equal(expected, actual.board)
def test_select_node(self): node = Node(Board.init_board()) nodes = [Node(Board.init_board()) for _ in range(4)] node.children = nodes for i in range(3): nodes[i].w = 2 nodes[i].n = 4 actual = select_node_ucb(node, 1.0) expected = nodes[-1] self.assertEqual(actual, expected)
def test_index_array_of_bound(self): board = Board(np.zeros((8, 8))) board.board[0][0] = -1 board.board[0][2] = -1 board.board[0][3] = 1 actual = put_and_reverse((0, 1), board) expected = np.zeros_like(board.board) expected[0][0] = -1 expected[0][1] = 1 expected[0][2] = 1 expected[0][3] = 1 np.testing.assert_array_equal(expected, actual.board)
def parse(s: str) -> Tuple[int, int]: x, y = s[0], s[1] x = ord(x) - ord("a") y = int(y) - 1 # to 0-indexed # check array index out of bound Hand((y, x), Board.init_board()) return y, x
def put_and_reverse(hand: Union[Hand, Tuple[int, int]], board: Board) -> Board: if not is_valid_hand(hand, board): raise OthelloRuntimeException("invalid hand: {} {}".format( hand, board)) new_board = Board(board.board.copy(), not board.side) hand, side_num, board = _unwrap(hand, board) board = new_board.board for slide in _SLIDES: next_point = (hand[0] + slide[0], hand[1] + slide[1]) while _is_on_board(next_point): if board[next_point[0]][next_point[1]] == 0: # 囲めない next_point = (-1, -1) break if board[next_point[0]][next_point[1]] == side_num: # 裏返されるやつ終わり break next_point = (next_point[0] + slide[0], next_point[1] + slide[1]) if _is_on_board(next_point): while next_point != hand: next_point = (next_point[0] - slide[0], next_point[1] - slide[1]) board[next_point[0]][next_point[1]] = side_num board[hand[0]][hand[1]] = side_num return new_board
def main(sente=True): board = Board.init_board() ai = MiniMaxAI(5) print(view_board(board)) hands = [] while True: if not extract_valid_hand(board): board.side ^= True hands.append(Hand.pass_hand()) if not extract_valid_hand(board): break if board.side ^ sente: hand = input_hand(board) else: hand = ai.put(board, hands) hands.append(hand) print("AI put: {}".format(hand)) if (board.board == 0).sum() < 12: # 計算時間に余裕があるのでdeepに読む ai.depth = 8 board = put_and_reverse(hand, board) print(view_board(board)) print("=" * 10 + " GAME OVER " + "=" * 10) x_count = (board.board == 1).sum() o_count = (board.board == -1).sum() print("x: {}, o: {}".format(x_count, o_count))
def is_finished(board: Board): if extract_valid_hand(board): return False board = Board(board.board, not board.side) if extract_valid_hand(board): return False return True
def play(network, ai_param=None): if ai_param is None: ai_param = dict(play_count=100) board = Board.init_board() ai1 = AlphaZero(network, history=True, **ai_param) ai2 = AlphaZero(network, history=True, **ai_param) hands = [] while True: if not extract_valid_hand(board): board.side ^= True hands.append(Hand.pass_hand()) if not extract_valid_hand(board): break if board.side: hand = ai1.put(board, hands) else: hand = ai2.put(board, hands) hands.append(hand) board = put_and_reverse(hand, board) result = judge_simple(board) history1 = ai1.history if isinstance(ai1, AlphaZero) else [] history2 = ai2.history if isinstance(ai1, AlphaZero) else [] for x in history1: x[-1] = result for x in history2: x[0] = x[0] * -1 x[-1] = -result return history1 + history2
def search_alpha_beta( self, board: Board, calc_score: Callable[[Board], float], depth: int = 8, alpha=-float("inf"), beta=float("inf") ) -> Tuple[List[Hand], float]: if is_finished(board): return [], WIN_SCORE * judge_simple(board) * (board.side * 2 - 1) if depth == 0: return [], calc_score(board) * (board.side * 2 - 1) best_hands = [] for point in self._extract_valid_hand(board): if point.is_pass_hand: new_board = Board(board.board, not board.side) else: new_board = self._put_and_reverse(point, board) hands, score = self.search_alpha_beta(new_board, calc_score, depth - 1, -beta, -alpha) score = -score if alpha < score: best_hands, alpha = ([point] + hands), score if beta <= alpha: return best_hands, alpha return best_hands, alpha
def main(sente=True): board = Board.init_board() board.side = sente ai = MonteCarloAI() print(view_board(board)) while True: if not extract_valid_hand(board): board.side ^= True if not extract_valid_hand(board): break if board.side: hand = input_hand(board) else: if (board.board == 0).sum() < 8: # 計算時間に余裕があるので全探索 hand = ai.put_exhaustive_search(board) else: hand = ai.put(board) print("AI put: {}".format(hand)) board = put_and_reverse(hand, board) print(view_board(board)) print("=" * 10 + " GAME OVER " + "=" * 10) x_count = (board.board == 1).sum() o_count = (board.board == -1).sum() print("x: {}, o: {}".format(x_count, o_count))
def evaluate(ai1, ai2, sente=True, is_view=False): board = Board.init_board() if is_view: print(view_board(board)) hands = [] while True: if not extract_valid_hand(board): board.side ^= True hands.append(Hand.pass_hand()) # print("pass hand!!") if not extract_valid_hand(board): break if board.side is sente: hand = ai1.put(board, hands) else: hand = ai2.put(board, hands) hands.append(hand) board = put_and_reverse(hand, board) if is_view: print(view_board(board)) # print(ai2.history) if is_view: print("=" * 10 + " GAME OVER " + "=" * 10) x_count = (board.board == 1).sum() o_count = (board.board == -1).sum() print("x: {}, o: {}".format(x_count, o_count)) return judge_simple(board) * (1 if sente else -1)
def _expand(self, node: Node) -> List[Node]: children = [ Node(self._put_and_reverse(hand, node.board), node, hand=hand) if not hand.is_pass_hand else Node( Board(node.board.board, not node.board.side), node, hand=hand) for hand in self._extract_valid_hand(node.board) ] node.children = children return children
def play_out(board: Board) -> float: root_board = board while not is_finished(board): valid_hands = extract_valid_hand(board) if len(valid_hands) == 0: board = Board(board.board, not board.side) continue hand = np.random.choice(valid_hands) board = put_and_reverse(hand, board) return judge_simple(board) * (1 if root_board.side else -1)
def test_score(self): def test(board: Board): def score(b: Board): return float(b.board[4][5] * 100) actual_hands, actual_score = self.searcher.search_mini_max(board, score, 1) actual_hands = [hand.hand for hand in actual_hands] self.assertEqual(actual_score, 100) self.assertEqual(actual_hands, [(4, 5)]) board = Board.init_board() self._test(board, test)
def test_cant_put(self): def test(board): def score(_: Board): return 0 actual_hands, actual_score = self.searcher.search_mini_max(board, score, 3) actual_hands = [hand.hand for hand in actual_hands] self.assertEqual(actual_score, WIN_SCORE) self.assertIn(actual_hands, [[(4, 5)], [(5, 4)], [(5, 5)]]) board = Board.init_board() board.board[3][3] = 1 self._test(board, test)
def test_random(self): def test(board): for i in range(5): _sb = np.random.normal(size=(8, 8)) def score(b: Board): return float((b.board * _sb).sum()) expected_hands, expected_score = self.searcher.search_mini_max(board, score, 4) actual_hands, actual_score = self.searcher.search_alpha_beta(board, score, 4) actual_hands = [hand.hand for hand in actual_hands] expected_hands = [hand.hand for hand in expected_hands] self.assertEqual(expected_score, actual_score) self.assertEqual(expected_hands, actual_hands) board = Board.init_board() self._test(board, test)
def search_mini_max(self, board: Board, calc_score: Callable[[Board], float], depth: int = 8) -> \ Tuple[List[Hand], float]: if is_finished(board): return [], WIN_SCORE * judge_simple(board) * (board.side * 2 - 1) if depth == 0: return [], calc_score(board) * (board.side * 2 - 1) best_hands, best_score = None, -float("inf") for point in self._extract_valid_hand(board): if point.is_pass_hand: new_board = Board(board.board, not board.side) else: new_board = self._put_and_reverse(point, board) hands, score = self.search_mini_max(new_board, calc_score, depth - 1) score = -score if best_score < score: best_hands, best_score = ([point] + hands), score return best_hands, best_score
def test_select_early_win(self): def test(board): def score(_: Board): return 0 actual_hands, actual_score = self.searcher.search_mini_max(board, score, 3) self.assertEqual(actual_score, WIN_SCORE) actual_hands = [hand.hand for hand in actual_hands] self.assertEqual(actual_hands, [(2, 2)]) board = np.zeros((8, 8)) board[0][2] = 1 board[1][1] = 1 board[2][0] = 1 board[2][1] = -1 board[1][2] = -1 board = Board(board) self._test(board, test)
def test_depth2(self): def test(board): def score(b: Board): sb = np.zeros((8, 8)) sb[3][2] = 500 sb[4][5] = 1000 sb[5][5] = 50 sb[6][5] = 10 sb[3][5] = 45 sb[2][6] = 1 return (b.board * sb).sum() actual_hands, actual_score = self.searcher.search_mini_max(board, score, 2) actual_hands = [hand.hand for hand in actual_hands] self.assertEqual(actual_score, 950) self.assertEqual(actual_hands, [(4, 5), (5, 5)]) board = Board.init_board() self._test(board, test)
def test_pass(self): def test(board): def score(_: Board): return 0 actual_hands, actual_score = self.searcher.search_mini_max(board, score, 5) self.assertEqual(actual_score, WIN_SCORE) self.assertTrue(actual_hands[1].is_pass_hand) actual_hands = [hand.hand for hand in actual_hands] self.assertIn(actual_hands, [[(0, 2), (0, 0), (7, 2)], [(7, 2), (0, 0), (0, 2)]]) board = Board(np.zeros((8, 8))) board.board[0][0] = 1 board.board[0][1] = -1 board.board[7][0] = 1 board.board[7][1] = -1 self._test(board, test)
def test_index_array_of_bound(self): board = Board(np.ones((8, 8)), False) board.board[1][1] = 0 # errorが起きないことを確認 actual = is_valid_hand((1, 1), board) self.assertFalse(actual)
def test_even(self): even = Board(np.array([[1] * 8, [-1] * 8] * 4)) even_result = judge(even) self.assertEquals(0, even_result)
def test_jump(self): board = Board.init_board() board.board[4][5] = 1 actual = is_valid_hand((4, 6), board) self.assertFalse(actual)
def test_diagonal(self): board = Board.init_board() board.board[3][3] = 1 actual = is_valid_hand((5, 5), board) self.assertTrue(actual)
def test_finish(self): board = Board(np.zeros((8, 8))) actual = is_finished(board) self.assertTrue(actual)
def test_able_x(self): board = Board(np.zeros((8, 8)), False) board.board[0][0] = 1 board.board[0][1] = -1 actual = is_finished(board) self.assertFalse(actual)
x = self.conv(x) x = self.resnet(x) x = F.max_pool2d(x, kernel_size=x.size()[2:]) x = torch.squeeze(x, -1) x = torch.squeeze(x, -1) p = self.p_clf(x) v = self.v_clf(x) p = F.softmax(p, dim=-1) v = torch.tanh(v) return p, v if __name__ == '__main__': # test from othello.model import Board from othello.helper import put_and_reverse input = Board.init_board() # input = put_and_reverse((2, 3), input) x = np.array([[input.board == 1, input.board == -1]], dtype=np.float32) network = Network() network.load_state_dict(torch.load("model/latest.pth")) p, v = network(x) p = p.to("cpu").detach().numpy().copy() v = v.to("cpu").detach().numpy().copy() print(sorted(p[-1])[::-1]) p = p[:, :-1].reshape((8, 8)) np.set_printoptions(precision=1) [print(p[i, :]) for i in range(8)] print(v)
def test_invalid_shape(self): with self.assertRaises(IllegalShapeException): Board(np.array([[1] * 8, [-1] * 8] * 5))
def test_invalid_shape(self): with self.assertRaises(IllegalIndexException): Hand((0, -1), Board.init_board())