def update_turn(board, player, node, network, pos): """ take board, player number, current node, player network and opponent move return child node according to opponent move """ # get child from number of empty moves before opponent move complete_board = (board[:, :, 0] + board[:, :, 1]).flatten() child = node.get_child( np.sum((complete_board[:(pos[1] * 19 + pos[0])] == 0))) # if unexplored yet if child.leaf(): expand(child, board, player, network) return child
def init_game(network_1, network_2): """ init game board, first node, next player turn """ board = init_map() # board = np.zeros((19, 19, 3), np.int8) # player 1 node_p_1 = Node(0) expand(node_p_1, board, 0, network_1) # player 2 node_p_2 = Node(0) expand(node_p_2, board, 1, network_2) return board, node_p_1, node_p_2
def test_winning_move_when_one(): """ test if network find winning move among one return true if winning move is found return else otherwise """ while True: has_winning_move = 0 tmp = np.random.randint(3, size=(19, 19)) board = np.zeros((1, 19, 19, 3), dtype=int) for y in range(19): for x in range(19): if tmp[y, x] == 1: board[0, y, x, 0] = 1 if evaluate(board, 0, [y, x]): board[0, y, x, 0] = 0 if has_winning_move: has_winning_move = 1 elif tmp[y, x] == 2: board[0, y, x, 1] = 1 if evaluate(board, 1, [y, x]): board[0, y, x, 1] = 0 if has_winning_move: has_winning_move = 2 if not has_winning_move: continue network = Network(0) node = Node(0) expand(node, board, has_winning_move - 1, network) turn(board, has_winning_move - 1, node, network) for y in range(19): for x in range(19): if evaluate(board, has_winning_move - 1, [y, x]): return True return False
def basic_win(): """ test with simple env """ e = np.array([[0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]) board = conv_map(e) player = 1 root = Node(0) net = Network(-1) expand(root, board, player, net) p, _ = net.infer(board) print_policy(board, p[0]) pos, n_board, p, p_node, status = mcts(board, player, root, net) print_policy(board, p) #root.debug() #print (pos) #print(evaluate(board, player, pos)) return 1
required=True) parser.add_argument('-v', help='Network version', required=True) parser.add_argument('-d', help='1 or 0, Outputs network policy, and mcts policy', required=True) args = vars(parser.parse_args()) player = int(args["s"]) - 1 version = int(args["v"]) debug = int(args["d"]) board = np.zeros((19, 19, 3), np.int8) node = Node(0) net = Network(version) expand(node, board, player, net) if player == 0: node, _ = human_turn(board, node, player, net) # game r = 0 while (not r): # ia if debug: p, _ = net.infer(board) _, board, p_, node, r = mcts(board, player ^ 1, node, net) if debug: print("Network policy:")
def play_turn(board, player, network): print("DEBUG", "Calculating the next move") node = Node(0) expand(node, board, player, network) (x, y), _, _, _, _ = mcts(board, player, node, network) print("%d,%d" % (int(x), int(y)))