コード例 #1
0
    def playout(self, state: Board):
        node = self.root
        start_depth = self.root.depth

        while 1:
            if node.is_leaf():
                # if node.depth < 5 or node.depth < 2 + start_depth or node.visit >= self.expand_bound:
                if (node.depth < start_depth + 5 and node.visit > (node.depth - start_depth) * 2) \
                        or node.visit >= self.expand_threshold:
                    if self.use_network:
                        policy = expand_policy_network(state)
                    else:
                        policy = expand_pocliy_random(state)
                    node.expand(policy)
                else:
                    break
            action, node = node.select()
            state.play(action)

            is_end, _ = state.check_winner()

            if is_end:
                break

        # bp_value = self.evaluate_rollout(state)
        # bp_value = self.evaluate_rollout_v2(state)
        # for _ in range(self.expand_bound):
        #     bp_value = self.rollout_simulation(state.copy())
        #     node.backpropagate(bp_value)

        bp_value = self.rollout_simulation(state)
        node.backpropagate(bp_value)
コード例 #2
0
ファイル: simulation.py プロジェクト: Thxios/BaekHyunGomokuAI
def simulate_network(board: Board, limit=100, q_confidence=0.5):
    random_bound = 4
    net_run_time = 0.
    start_time = time.time() * 1000

    is_end, winner = False, None

    for i in range(limit):
        is_end, winner = board.check_winner()
        if is_end:
            # print(i, 'end')
            break

        must = board.check_must()
        if must is not None:
            if not board.play(must):
                raise ValueError('Must Error')
            continue

        t1 = time.time() * 1000
        action_prob = rollout_policy_network(board)
        # action_prob = policy(board)
        t2 = time.time() * 1000
        net_run_time += t2 - t1

        if i < random_bound:
            next_action = np.random.choice(15 * 15, 1, p=action_prob)[0]

            while not board.play(next_action):
                next_action = np.random.choice(15 * 15, 1, p=action_prob)[0]

        else:
            next_action = max(board.available,
                              key=lambda move: action_prob[move])
            board.play(next_action)

    end_time = time.time() * 1000

    # print('%.5fms  %.5fms  %.5f%%' % (end_time - start_time, net_run_time, net_run_time * 100 / (end_time - start_time)))
    if is_end:
        if winner is not None:
            return winner
    value_network_Q = ValueRunner(board.get_board_array())
    if board.current_player == BLACK_:  # array shape (white, black)
        value_network_Q = 1 - value_network_Q  # change to black side
    return q_confidence * (value_network_Q - 0.5) + 0.5
コード例 #3
0
ファイル: simulation.py プロジェクト: Thxios/BaekHyunGomokuAI
def simulate_random(board: Board, limit=200):
    net_run_time = 0.
    start_time = time.time() * 1000

    is_end, winner = False, None

    for i in range(limit):
        is_end, winner = board.check_winner()
        if is_end:
            # print(i, 'end')
            break

        must = board.check_must()
        if must is not None:
            # if board.play(must):
            #     continue
            if not board.play(must):
                raise ValueError('Must Error')
            # board.show()
            continue

        t1 = time.time() * 1000
        action_prob = rollout_policy_random(board)
        # action_prob = policy(board)
        t2 = time.time() * 1000
        net_run_time += t2 - t1

        next_action = np.argmax(action_prob)

        while not board.play(next_action):
            next_action = np.argmax(action_prob)

    end_time = time.time() * 1000

    # print('%.5fms  %.5fms  %.5f%%' % (end_time - start_time, net_run_time, net_run_time * 100 / (end_time - start_time)))
    if is_end:
        if winner is not None:
            return winner

    return 0.5
コード例 #4
0
class Server:
    def __init__(self,
                 player_black: Union[AgentCLI, AgentGUI, DeepMCTSAgent,
                                     PureMCTSAgent],
                 player_white: Union[AgentCLI, AgentGUI, DeepMCTSAgent,
                                     PureMCTSAgent],
                 queue_draw=None):
        self.board = Board()

        self.black = player_black
        self.white = player_white

        self.player = {BLACK_: self.black, WHITE_: self.white}

        self.queue_draw = queue_draw
        self.playing = False
        if self.black.gui or self.white.gui:
            self.gui = True
        else:
            self.gui = False

        self._time = {
            BLACK_: [],
            WHITE_: [],
        }

    @property
    def current_player(self):
        return self.player[self.board.current_player]

    def run(self, log_path=None, time_log_path=None):

        self.playing = True
        winner = None
        self.board.show()
        sys.stdout.flush()
        while self.playing:

            t1 = time.time()
            action = self.current_player.get_action(self.board)
            t2 = time.time()
            self._time[self.board.current_player].append(t2 - t1)

            if log_path is not None:
                with open(log_path, 'a') as f:
                    f.write(str(move_int2xy(action)) + '\n')

            self.board.play(action)

            os.system('cls')
            self.board.show()
            Log.flush()
            sys.stdout.flush()
            if self.gui:
                self.queue_draw.put(
                    (*move_int2xy(action), self.board.last_player))

            is_end, winner = self.board.check_winner()
            if is_end:
                self.playing = False

        if winner is None:
            print(' Game Draw')
        elif winner == BLACK_:
            print(' Black win')
        elif winner == WHITE_:
            print(' White win')

        if log_path is not None:
            with open(log_path, 'a') as f:
                if winner is None:
                    f.write('Game Draw\n')
                elif winner == BLACK_:
                    f.write('Black win\n')
                elif winner == WHITE_:
                    f.write('White win\n')
        if time_log_path is not None:
            if type(self.black) == DeepMCTSAgent:
                black_type = 'Deep MCTS Agent with compute budget %d' % self.black.compute_budget
            elif type(self.black) == PureMCTSAgent:
                black_type = 'Pure MCTS Agent with compute budget %d' % self.black.compute_budget
            else:
                black_type = 'Human Player'

            if type(self.white) == DeepMCTSAgent:
                white_type = 'Deep MCTS Agent with compute budget %d' % self.white.compute_budget
            elif type(self.white) == PureMCTSAgent:
                white_type = 'Pure MCTS Agent with compute budget %d' % self.white.compute_budget
            else:
                white_type = 'Human Player'

            with open(time_log_path, 'a') as f:
                f.write('Black player: ' + black_type + '\n')
                for black_time in self._time[BLACK_][1:]:
                    f.write('%.4f\n' % black_time)
                sum_black, len_black = sum(
                    self._time[BLACK_][1:]), len(self._time[BLACK_]) - 1
                f.write('average: %.4fs per action\n' %
                        (sum_black / len_black))
                if self.black.use_mcts:
                    f.write('         %.4fms per playout\n' %
                            (sum_black * 1000 / len_black /
                             self.black.compute_budget))
                f.write('\n')
                f.write('White player: ' + white_type + '\n')
                for white_time in self._time[WHITE_]:
                    f.write('%.4f\n' % white_time)
                sum_white, len_white = sum(self._time[WHITE_]), len(
                    self._time[WHITE_])
                f.write('average: %.4fs per action\n' %
                        (sum_white / len_white))
                if self.white.use_mcts:
                    f.write('         %.4fms per playout\n' %
                            (sum_white * 1000 / len_white /
                             self.white.compute_budget))