Esempio n. 1
0
    def playout(self, state: Board):
        node = self.root
        start_depth = self.root.depth

        while 1:
            if node.is_leaf():
                # if node.depth < 5 or node.depth < 2 + start_depth or node.visit >= self.expand_bound:
                if (node.depth < start_depth + 5 and node.visit > (node.depth - start_depth) * 2) \
                        or node.visit >= self.expand_threshold:
                    if self.use_network:
                        policy = expand_policy_network(state)
                    else:
                        policy = expand_pocliy_random(state)
                    node.expand(policy)
                else:
                    break
            action, node = node.select()
            state.play(action)

            is_end, _ = state.check_winner()

            if is_end:
                break

        # bp_value = self.evaluate_rollout(state)
        # bp_value = self.evaluate_rollout_v2(state)
        # for _ in range(self.expand_bound):
        #     bp_value = self.rollout_simulation(state.copy())
        #     node.backpropagate(bp_value)

        bp_value = self.rollout_simulation(state)
        node.backpropagate(bp_value)
Esempio n. 2
0
    def get_action(self, state: Board) -> int:
        # last = state.last_move
        # if last != -1:
        #     last_x, last_y = move_int2xy(last)
        #     self.queue_draw.put((last_x, last_y, state.last_player))

        get_input = True
        move_int = -1
        while get_input:
            # print('get')
            try:
                x, y = self.queue_move.get()
            except TypeError:
                print('\n GUI window not found\n exit program\n')
                time.sleep(1)
                sys.exit()
            # print((x, y))
            move_int = move_xy2int(x, y)
            if state.check_valid(move_int):
                get_input = False
                # print('valid')

        # self.queue_draw.put((x, y, state.current_player))

        return move_int
Esempio n. 3
0
    def get_move(self, state: Board):
        if state.is_empty():
            return move_xy2int(WIDTH // 2, HEIGHT // 2)

        time.sleep(0.1)
        start_time = time.time()

        if len(state.moved) == 1 or len(state.moved) == 2:
            it = self.compute_budget // 2
        else:
            it = self.compute_budget

        for i in tqdm(range(it)):
            board_to_search = state.copy()
            self.playout(board_to_search)

        end_time = time.time()
        children = self.root.children.items()

        Log.silent_log('%d playouts in %.3f second' %
                       (self.compute_budget, end_time - start_time))
        Log.silent_log('average : %.3f ms\n' %
                       ((end_time - start_time) / self.compute_budget * 1000))
        Log.silent_log('most visited node:')
        Log.silent_log('|  ' + 'action'.ljust(8, ' ') + '|  ' +
                       'visit'.ljust(8, ' ') + '|  ' +
                       'probability'.ljust(13, ' ') + '|  ' + '   Q      |')
        for action, c in sorted(children,
                                key=lambda child: child[1].visit,
                                reverse=True)[:5]:
            if c.visit == 0 and c.probability < 0.01:
                continue
            Log.silent_log('|  ' + ' ' + str(move_int2cord(action)).ljust(7, ' ') + '|  ' + \
                           (' %d' % c.visit).ljust(8, ' ') + '|  ' + \
                           (' %.3f%%' % (c.probability * 100)).ljust(13, ' ') + '|  ' + \
                           ('%.4f' % c.Q).rjust(7, ' ') + '   |')
        most_visited_move = max(children, key=lambda child: child[1].visit)[0]
        # print('mean:', sum(t) / len(t), 'ms')
        # print('acc:', sum(check) / len(check) * 100, '%')
        # print('q:', sum(q_log) / len(q_log))

        return most_visited_move
Esempio n. 4
0
def simulate_random(board: Board, limit=200):
    net_run_time = 0.
    start_time = time.time() * 1000

    is_end, winner = False, None

    for i in range(limit):
        is_end, winner = board.check_winner()
        if is_end:
            # print(i, 'end')
            break

        must = board.check_must()
        if must is not None:
            # if board.play(must):
            #     continue
            if not board.play(must):
                raise ValueError('Must Error')
            # board.show()
            continue

        t1 = time.time() * 1000
        action_prob = rollout_policy_random(board)
        # action_prob = policy(board)
        t2 = time.time() * 1000
        net_run_time += t2 - t1

        next_action = np.argmax(action_prob)

        while not board.play(next_action):
            next_action = np.argmax(action_prob)

    end_time = time.time() * 1000

    # print('%.5fms  %.5fms  %.5f%%' % (end_time - start_time, net_run_time, net_run_time * 100 / (end_time - start_time)))
    if is_end:
        if winner is not None:
            return winner

    return 0.5
Esempio n. 5
0
def expand_pocliy_random(board: Board):
    board_array = board.get_board_array()
    conv_available = _convolve_board_available_narrow(board_array)
    cnt = conv_available.sum()
    probability = conv_available / cnt


    return_list = []
    for move in board.available:
        if conv_available[move]:
            return_list.append((move, probability[move]))

    return return_list
Esempio n. 6
0
    def __init__(self,
                 player_black: Union[AgentCLI, AgentGUI, DeepMCTSAgent,
                                     PureMCTSAgent],
                 player_white: Union[AgentCLI, AgentGUI, DeepMCTSAgent,
                                     PureMCTSAgent],
                 queue_draw=None):
        self.board = Board()

        self.black = player_black
        self.white = player_white

        self.player = {BLACK_: self.black, WHITE_: self.white}

        self.queue_draw = queue_draw
        self.playing = False
        if self.black.gui or self.white.gui:
            self.gui = True
        else:
            self.gui = False

        self._time = {
            BLACK_: [],
            WHITE_: [],
        }
Esempio n. 7
0
def simulate_network(board: Board, limit=100, q_confidence=0.5):
    random_bound = 4
    net_run_time = 0.
    start_time = time.time() * 1000

    is_end, winner = False, None

    for i in range(limit):
        is_end, winner = board.check_winner()
        if is_end:
            # print(i, 'end')
            break

        must = board.check_must()
        if must is not None:
            if not board.play(must):
                raise ValueError('Must Error')
            continue

        t1 = time.time() * 1000
        action_prob = rollout_policy_network(board)
        # action_prob = policy(board)
        t2 = time.time() * 1000
        net_run_time += t2 - t1

        if i < random_bound:
            next_action = np.random.choice(15 * 15, 1, p=action_prob)[0]

            while not board.play(next_action):
                next_action = np.random.choice(15 * 15, 1, p=action_prob)[0]

        else:
            next_action = max(board.available,
                              key=lambda move: action_prob[move])
            board.play(next_action)

    end_time = time.time() * 1000

    # print('%.5fms  %.5fms  %.5f%%' % (end_time - start_time, net_run_time, net_run_time * 100 / (end_time - start_time)))
    if is_end:
        if winner is not None:
            return winner
    value_network_Q = ValueRunner(board.get_board_array())
    if board.current_player == BLACK_:  # array shape (white, black)
        value_network_Q = 1 - value_network_Q  # change to black side
    return q_confidence * (value_network_Q - 0.5) + 0.5
Esempio n. 8
0
def expand_policy_network(board: Board) -> List[Tuple[int, float]]:
    board_array = board.get_board_array()
    probability = TreePolicyRunner(board_array)

    if len(board.moved) == 1:
        if board.moved[0] == move_xy2int(7, 7):
            return list(map(lambda move: (move, probability[move]), __second_move_available))
    if len(board.must[board.current_player]):
        return list(map(lambda move: (move, probability[move]), (board.must[board.current_player])))
    if len(board.must[0] | board.must[1]):
        return list(map(lambda move: (move, probability[move]), (board.must[0] | board.must[1])))

    conv_available = _convolve_board_available_wide(board_array)
    return_list = []

    for move in board.available:
        if conv_available[move]:
            return_list.append((move, probability[move]))

    return return_list
Esempio n. 9
0
    def get_action(self, state: Board) -> int:
        while True:
            raw = input('action to move : ').lower().strip().replace(' ', '')
            try:
                x = ord(raw[0]) - 97
                y = int(raw[1:]) - 1
            except ValueError:
                print('invalid input format', raw)
                continue
            if x >= WIDTH or y >= HEIGHT or x < 0 or y < 0:
                print('invalid input range:', (x, y))
                continue

            move = move_xy2int(x, y)

            if state.check_valid(move):
                return move
            else:
                print('invalid action:', (x, y))
                continue
Esempio n. 10
0
def rollout_policy_network(board: Board):
    probability = RolloutPolicyRunner(board.get_board_array())

    return probability
Esempio n. 11
0
def rollout_policy_random(board: Board):
    board_array = board.get_board_array()
    conv_available = _convolve_board_available_narrow(board_array)
    probability = np.random.rand(15 * 15) * conv_available

    return probability
Esempio n. 12
0
class Server:
    def __init__(self,
                 player_black: Union[AgentCLI, AgentGUI, DeepMCTSAgent,
                                     PureMCTSAgent],
                 player_white: Union[AgentCLI, AgentGUI, DeepMCTSAgent,
                                     PureMCTSAgent],
                 queue_draw=None):
        self.board = Board()

        self.black = player_black
        self.white = player_white

        self.player = {BLACK_: self.black, WHITE_: self.white}

        self.queue_draw = queue_draw
        self.playing = False
        if self.black.gui or self.white.gui:
            self.gui = True
        else:
            self.gui = False

        self._time = {
            BLACK_: [],
            WHITE_: [],
        }

    @property
    def current_player(self):
        return self.player[self.board.current_player]

    def run(self, log_path=None, time_log_path=None):

        self.playing = True
        winner = None
        self.board.show()
        sys.stdout.flush()
        while self.playing:

            t1 = time.time()
            action = self.current_player.get_action(self.board)
            t2 = time.time()
            self._time[self.board.current_player].append(t2 - t1)

            if log_path is not None:
                with open(log_path, 'a') as f:
                    f.write(str(move_int2xy(action)) + '\n')

            self.board.play(action)

            os.system('cls')
            self.board.show()
            Log.flush()
            sys.stdout.flush()
            if self.gui:
                self.queue_draw.put(
                    (*move_int2xy(action), self.board.last_player))

            is_end, winner = self.board.check_winner()
            if is_end:
                self.playing = False

        if winner is None:
            print(' Game Draw')
        elif winner == BLACK_:
            print(' Black win')
        elif winner == WHITE_:
            print(' White win')

        if log_path is not None:
            with open(log_path, 'a') as f:
                if winner is None:
                    f.write('Game Draw\n')
                elif winner == BLACK_:
                    f.write('Black win\n')
                elif winner == WHITE_:
                    f.write('White win\n')
        if time_log_path is not None:
            if type(self.black) == DeepMCTSAgent:
                black_type = 'Deep MCTS Agent with compute budget %d' % self.black.compute_budget
            elif type(self.black) == PureMCTSAgent:
                black_type = 'Pure MCTS Agent with compute budget %d' % self.black.compute_budget
            else:
                black_type = 'Human Player'

            if type(self.white) == DeepMCTSAgent:
                white_type = 'Deep MCTS Agent with compute budget %d' % self.white.compute_budget
            elif type(self.white) == PureMCTSAgent:
                white_type = 'Pure MCTS Agent with compute budget %d' % self.white.compute_budget
            else:
                white_type = 'Human Player'

            with open(time_log_path, 'a') as f:
                f.write('Black player: ' + black_type + '\n')
                for black_time in self._time[BLACK_][1:]:
                    f.write('%.4f\n' % black_time)
                sum_black, len_black = sum(
                    self._time[BLACK_][1:]), len(self._time[BLACK_]) - 1
                f.write('average: %.4fs per action\n' %
                        (sum_black / len_black))
                if self.black.use_mcts:
                    f.write('         %.4fms per playout\n' %
                            (sum_black * 1000 / len_black /
                             self.black.compute_budget))
                f.write('\n')
                f.write('White player: ' + white_type + '\n')
                for white_time in self._time[WHITE_]:
                    f.write('%.4f\n' % white_time)
                sum_white, len_white = sum(self._time[WHITE_]), len(
                    self._time[WHITE_])
                f.write('average: %.4fs per action\n' %
                        (sum_white / len_white))
                if self.white.use_mcts:
                    f.write('         %.4fms per playout\n' %
                            (sum_white * 1000 / len_white /
                             self.white.compute_budget))