예제 #1
0
    def do(self, ac):
        """
        :param int|None action: move pos=0 ~ 63 (0=top left, 7 top right, 63 bottom right), None is resign
        :return:
        """
        # check error
        assert ac is None or 0 <= ac <= 63, f"Illegal ac={ac} {self.epoch}"

        # action None branch
        if ac is None:
            logger.warning(f"SITUATION: resigned {self.epoch}")
            self._other_win()
            return self.chessboard
        else:
            # own = next_move_color
            own, opp = (self.chessboard.black, self.chessboard.white
                        ) if self.next_to_play == Stone.black else (
                            self.chessboard.white, self.chessboard.black)
            # flipped=after move own
            flipped = calc_flip(ac, own, opp)

            # if not flipped
            if bit_count(flipped) == 0:
                logger.warning(
                    f"SITUATION: Illegal ac={ac}, No Flipped, Set {switch_sides(self.next_to_play)} win {self.epoch}"
                )
                self._other_win()
                return self.chessboard
            else:
                # flip the board
                own, opp = self._do_flip(own, opp, ac, flipped)
                self.chessboard.black, self.chessboard.white = (
                    own, opp) if self.next_to_play == Stone.black else (opp,
                                                                        own)
                # if there's still way to go
                if bit_count(find_correct_moves(
                        opp, own)) > 0:  # there are legal moves for opp.
                    self.next_to_play = switch_sides(self.next_to_play)
                elif bit_count(find_correct_moves(
                        own,
                        opp)) > 0:  # there are legal moves for me but opp.
                    pass
                else:  # there is no legal moves for me and opp.
                    # logger.warning(f"SITUATION: won till game over {self.epoch}")
                    self._game_over()

        return self.chessboard
예제 #2
0
 def available(self, px, py):
     own, enemy = (self.env.chessboard.black, self.env.chessboard.white
                   ) if self.env.next_to_play == Stone.black else (
                       self.env.chessboard.white, self.env.chessboard.black)
     action = int(py * 8 + px)
     if action < 0 or 64 <= action or (1<<action) & self.env.chessboard.black or (1<<action) & self.env.chessboard.white\
             or not (1<<action) & find_correct_moves(own, enemy):
         return False
     return 1
예제 #3
0
    def ____decide_action(self, env, is_root_node):
        # find correct moves
        node = create_node(env)
        legal_moves = find_correct_moves(
            node.black, node.white
        ) if env.next_to_play == Stone.black else find_correct_moves(
            node.white, node.black)

        # vn = formula here
        vn = max(np.sqrt(np.sum(self.num_tree[node])),
                 1)  # SQRT of sum(N(s, b); for all b)

        # p = formula here  re-normalize in legal moves
        vp = self.policy_tree[node]
        vp = vp * bit_to_array(legal_moves, 64)
        temperature = 1
        if np.sum(vp) > 0:
            temperature = min(
                np.exp(1 -
                       np.power(env.epoch / self.config.play.policy_decay_turn,
                                self.config.play.policy_decay_power)), 1)
            vp = normalize(vp, temperature)
        # add noise 0.75*p + 0.25*noise
        if is_root_node and self.play_config.noise_eps > 0:  # Is it correct?? -> (1-e)p + e*Dir(alpha)
            noise = dirichlet_noise_of_mask(legal_moves,
                                            self.play_config.dirichlet_alpha)
            vp = (1 - self.play_config.noise_eps
                  ) * vp + self.play_config.noise_eps * noise

        # u_ = formula here
        vpn = vp * vn / (1 + self.num_tree[node])
        if env.next_to_play == Stone.black:
            vpn_with_weight = (self.win_rate(node) * self.c + vpn + 1000 +
                               self.weight_table) * bit_to_array(
                                   legal_moves, 64)
        else:
            vpn_with_weight = (-self.win_rate(node) * self.c + vpn + 1000 +
                               self.weight_table) * bit_to_array(
                                   legal_moves, 64)
        action_t = int(np.argmax(vpn_with_weight))
        return action_t
예제 #4
0
 def _set_first_move(self, node):
     # chose the random num_tree = [1] policy_tree = [每个可能的地方都是1/n]
     legal_array = bit_to_array(find_correct_moves(node.black, node.white),
                                64)
     action = np.argmax(legal_array)
     update_num_tree_with_one_or_moresides(self.num_tree, node, action,
                                           ["set"], [1])
     update_win_tree_with_one_or_moresides(self.win_tree, node, action,
                                           ["set"], [0])
     update_policy_tree_with_one_or_moresides(
         self.policy_tree, node, ["set"],
         [legal_array / np.sum(legal_array)])
예제 #5
0
    def _find_winning_move_and_score(self, env: OthelloEnv, exactly=True):
        # end
        if env.done:
            b, w = env.chessboard.black_white
            return None, b - w

        # restored
        key = black, white, next_to_play = env.chessboard.black, env.chessboard.white, env.next_to_play
        if key in self.cache:  # store leaf node
            return self.cache[key]

        # timeout
        if time() - self.start_time > self.timeout:
            logger.debug("timeout!")
            raise Timeout()

        # recursive
        legal_moves = find_correct_moves(
            *(white, black) if not next_to_play == Stone.black else (black,
                                                                     white))
        action_list = [idx for idx in range(64)
                       if legal_moves & (1 << idx)]  # 遍历所有解
        score_list = np.zeros(len(action_list), dtype=int)
        record_turn = env.epoch
        for i, action in enumerate(action_list):
            env.chessboard.black = black
            env.chessboard.white = white
            env.next_to_play = next_to_play
            env.epoch = record_turn
            env.done = False
            env.Result = None
            env.do(action)
            _, score = self._find_winning_move_and_score(env, exactly=exactly)
            score_list[i] = score

            if not exactly:
                if next_to_play == Stone.black and score > 0:  #  找到一个就得
                    break
                elif next_to_play == Stone.white and score < 0:
                    break

        best_action, best_score = (
            action_list[int(np.argmax(score_list))],
            np.max(score_list)) if next_to_play == Stone.black else (
                action_list[int(np.argmin(score_list))], np.min(score_list))
        self.cache[key] = (best_action, best_score)
        return best_action, best_score
예제 #6
0
 def _update_avalable(self, black, white, action, policy):
     node = TreeNode(black, white, Stone.black.value)
     next_key = self.__get_next_key(black, white, action)
     self.avalable = LastAva(
         find_correct_moves(node.black, node.white),
         find_correct_moves(next_key.white, next_key.black))