def play(network, ai_param=None): if ai_param is None: ai_param = dict(play_count=100) board = Board.init_board() ai1 = AlphaZero(network, history=True, **ai_param) ai2 = AlphaZero(network, history=True, **ai_param) hands = [] while True: if not extract_valid_hand(board): board.side ^= True hands.append(Hand.pass_hand()) if not extract_valid_hand(board): break if board.side: hand = ai1.put(board, hands) else: hand = ai2.put(board, hands) hands.append(hand) board = put_and_reverse(hand, board) result = judge_simple(board) history1 = ai1.history if isinstance(ai1, AlphaZero) else [] history2 = ai2.history if isinstance(ai1, AlphaZero) else [] for x in history1: x[-1] = result for x in history2: x[0] = x[0] * -1 x[-1] = -result return history1 + history2
def main(sente=True): board = Board.init_board() ai = MiniMaxAI(5) print(view_board(board)) hands = [] while True: if not extract_valid_hand(board): board.side ^= True hands.append(Hand.pass_hand()) if not extract_valid_hand(board): break if board.side ^ sente: hand = input_hand(board) else: hand = ai.put(board, hands) hands.append(hand) print("AI put: {}".format(hand)) if (board.board == 0).sum() < 12: # 計算時間に余裕があるのでdeepに読む ai.depth = 8 board = put_and_reverse(hand, board) print(view_board(board)) print("=" * 10 + " GAME OVER " + "=" * 10) x_count = (board.board == 1).sum() o_count = (board.board == -1).sum() print("x: {}, o: {}".format(x_count, o_count))
def main(sente=True): board = Board.init_board() board.side = sente ai = MonteCarloAI() print(view_board(board)) while True: if not extract_valid_hand(board): board.side ^= True if not extract_valid_hand(board): break if board.side: hand = input_hand(board) else: if (board.board == 0).sum() < 8: # 計算時間に余裕があるので全探索 hand = ai.put_exhaustive_search(board) else: hand = ai.put(board) print("AI put: {}".format(hand)) board = put_and_reverse(hand, board) print(view_board(board)) print("=" * 10 + " GAME OVER " + "=" * 10) x_count = (board.board == 1).sum() o_count = (board.board == -1).sum() print("x: {}, o: {}".format(x_count, o_count))
def evaluate(ai1, ai2, sente=True, is_view=False): board = Board.init_board() if is_view: print(view_board(board)) hands = [] while True: if not extract_valid_hand(board): board.side ^= True hands.append(Hand.pass_hand()) # print("pass hand!!") if not extract_valid_hand(board): break if board.side is sente: hand = ai1.put(board, hands) else: hand = ai2.put(board, hands) hands.append(hand) board = put_and_reverse(hand, board) if is_view: print(view_board(board)) # print(ai2.history) if is_view: print("=" * 10 + " GAME OVER " + "=" * 10) x_count = (board.board == 1).sum() o_count = (board.board == -1).sum() print("x: {}, o: {}".format(x_count, o_count)) return judge_simple(board) * (1 if sente else -1)
def _calc_valid_hand_p(self, p: np.ndarray, board: Board) -> Tuple[np.ndarray, List[Hand]]: p = p[:-1].reshape((8, 8)) hands = extract_valid_hand(board) valid_p = np.array([p[hand.hand[0]][hand.hand[1]] for hand in hands]) result = valid_p / sum(valid_p + 1e-18) return result, hands
def play_out(board: Board) -> float: root_board = board while not is_finished(board): valid_hands = extract_valid_hand(board) if len(valid_hands) == 0: board = Board(board.board, not board.side) continue hand = np.random.choice(valid_hands) board = put_and_reverse(hand, board) return judge_simple(board) * (1 if root_board.side else -1)
def _extract_valid_hand(self, board: Board): ret = extract_valid_hand(board) if ret: return ret else: return [Hand.pass_hand()]
def put(self, board: Board, hands: List[Hand]) -> Hand: hands = extract_valid_hand(board) return np.random.choice(hands)
def test_false_side(self): board = Board.init_board(False) actual = extract_valid_hand(board) expected = [(2, 4), (4, 2), (3, 5), (5, 3)] self.assertEquals(set(expected), set(i.hand for i in actual))