Exemple #1
0
 def _calc_policy(self, own, enemy):
     env = OthelloEnv().update(own, enemy, Stone.black)
     node = create_node(env)
     # if turn < 4
     if env.epoch < self.play_config.change_tau_turn:
         return self.__calc_policy_by_prob(node)  # p value
     else:
         return self.__calc_policy_by_max(node)
Exemple #2
0
    def __init__(self, config: Config):
        self.config = config
        self.env = OthelloEnv().reset()
        self.ai = OthelloPlayer(self.config,
                                _load_model(self.config),
                                weight_table=WEIGHT_TABLE / 3,
                                c=20,
                                mc=True)  # type: OthelloPlayer

        self.human_stone = None

        self.rev_function = None
        self.count_one_step = 0
        self.count_all_step = 0
        self.last_evaluation = None
        self.last_history = None  # type: LastAcNQ
        self.last_ava = None
        self.action = None
Exemple #3
0
 async def __start_search_my_move(self, own, enemy):
     # set parmas
     self.running_simulation_num += 1
     # wait sems
     with await self.sem:  # 8綫程
         env = OthelloEnv().update(own, enemy, Stone.black)
         leaf_v = await self.___recursive_simulation(env, is_root_node=True)
         self.running_simulation_num -= 1
         return leaf_v
Exemple #4
0
    def solve(self, black, white, next_to_play, exactly=False):
        # set stuff
        self.start_time = time()
        if not self.last_is_exactly and exactly:  # exactly时候要注意去掉上次的cache
            self.cache = {}
        self.last_is_exactly = exactly

        # try searching process
        try:
            move, score = self._find_winning_move_and_score(
                OthelloEnv().update(black, white, next_to_play),
                exactly=exactly)
            return move, score if next_to_play == Stone.black else -score
        except Timeout:
            return None, None
Exemple #5
0
    def think_and_play(self, own, enemy):
        """play tmd:方案:50步以前使用深度學習mctree,若tree到達50步深度后再用minmaxtree; 50步以後直接用minmaxtree
        若搜不到/超時再用之前構建的樹"""
        # renew env
        self.start_time = time.time()
        env = OthelloEnv().update(own, enemy, next_to_play=Stone.black)
        node = create_node(env)

        #五十步之后直接minmax树搜索,若搜索不到,再用深度學習
        if env.epoch >= self.play_config.use_solver_turn:
            logger.warning(f"Entering minmax_tree process")
            ret = self._solver(node)
            if ret:  # not save move as play data
                return ret
        else:  # 五十步之前直接用深度學習
            for t1 in range(self.play_config.thinking_loop
                            ):  # search moves for 3 times
                logger.warning(f"Entering {t1} thinking_loop")
                self._expand_tree(env, node)
                policy, action, value_diff = self._calc_policy_and_action(node)
                # if action 足够大 + n足够大 \ turn 很小
                if env.epoch <= self.play_config.start_rethinking_turn or \
                        (value_diff > -0.01 and self.num_tree[node][action] >= self.play_config.required_visit_to_decide_action):
                    break

            # record or return
            if self.mode == 'gui':
                self._update_thinking_history(own, enemy, action, policy)
                self._update_avalable(own, enemy, action, policy)
            elif self.mode == 'self_play':
                if self.allow_resign:  # resign win_rate 太小没有胜率。
                    if self.play_config.resign_threshold is not None and\
                        np.max(self.win_rate(node)-(self.num_tree[node]==0)*10) <= self.play_config.resign_threshold:
                        if env.epoch >= self.config.play.allowed_resign_turn:
                            return AcNQ(None, 0, 0)  # means resign
                        else:
                            logger.debug(
                                f"Want to resign but disallowed turn {env.epoch} < {self.config.play.allowed_resign_turn}"
                            )
                # save fuckers
                saved_policy = self.__calc_policy_by_prob(
                    node
                ) if self.config.play_data.save_policy_of_tau_1 else policy
                self.__save_data_to_moves(own, enemy, saved_policy)
            return AcNQ(action=action,
                        n=self.num_tree[node][action],
                        q=self.win_rate(node)[action])
Exemple #6
0
    def _find_winning_move_and_score(self, env: OthelloEnv, exactly=True):
        # end
        if env.done:
            b, w = env.chessboard.black_white
            return None, b - w

        # restored
        key = black, white, next_to_play = env.chessboard.black, env.chessboard.white, env.next_to_play
        if key in self.cache:  # store leaf node
            return self.cache[key]

        # timeout
        if time() - self.start_time > self.timeout:
            logger.debug("timeout!")
            raise Timeout()

        # recursive
        legal_moves = find_correct_moves(
            *(white, black) if not next_to_play == Stone.black else (black,
                                                                     white))
        action_list = [idx for idx in range(64)
                       if legal_moves & (1 << idx)]  # 遍历所有解
        score_list = np.zeros(len(action_list), dtype=int)
        record_turn = env.epoch
        for i, action in enumerate(action_list):
            env.chessboard.black = black
            env.chessboard.white = white
            env.next_to_play = next_to_play
            env.epoch = record_turn
            env.done = False
            env.Result = None
            env.do(action)
            _, score = self._find_winning_move_and_score(env, exactly=exactly)
            score_list[i] = score

            if not exactly:
                if next_to_play == Stone.black and score > 0:  #  找到一个就得
                    break
                elif next_to_play == Stone.white and score < 0:
                    break

        best_action, best_score = (
            action_list[int(np.argmax(score_list))],
            np.max(score_list)) if next_to_play == Stone.black else (
                action_list[int(np.argmin(score_list))], np.min(score_list))
        self.cache[key] = (best_action, best_score)
        return best_action, best_score
Exemple #7
0
class EnvGui:
    def __init__(self, config: Config):
        self.config = config
        self.env = OthelloEnv().reset()
        self.ai = OthelloPlayer(self.config,
                                _load_model(self.config),
                                weight_table=WEIGHT_TABLE / 3,
                                c=20,
                                mc=True)  # type: OthelloPlayer

        self.human_stone = None

        self.rev_function = None
        self.count_one_step = 0
        self.count_all_step = 0
        self.last_evaluation = None
        self.last_history = None  # type: LastAcNQ
        self.last_ava = None
        self.action = None

    def start_game(self, human_is_black):
        # set color and env
        self.__init__(self.config)
        self.human_stone = Stone.black if human_is_black else Stone.white

    def play_next_turn(self):
        # update + ai_move / over
        self._do_move(1)

        # do over
        if self.env.done:
            self._do_move(3)
            return

        # do ai_move
        if self.env.next_to_play != self.human_stone:
            self._do_move(2)

    def _do_move(self, event):
        self.rev_function[event]()

    def add_observer(self, ob_map):
        self.rev_function = ob_map

    def stone(self, px, py):
        """left top=(0, 0), right bottom=(7,7)"""
        action = int(py * 8 + px)
        if self.env.chessboard.black & (1 << action):
            return 2
        elif self.env.chessboard.white & (1 << action):
            return 1
        else:
            return 0

    def available(self, px, py):
        own, enemy = (self.env.chessboard.black, self.env.chessboard.white
                      ) if self.env.next_to_play == Stone.black else (
                          self.env.chessboard.white, self.env.chessboard.black)
        action = int(py * 8 + px)
        if action < 0 or 64 <= action or (1<<action) & self.env.chessboard.black or (1<<action) & self.env.chessboard.white\
                or not (1<<action) & find_correct_moves(own, enemy):
            return False
        return 1

    def move(self, px, py):
        self.env.do(int(py * 8 + px))

    def move_by_ai(self):
        own, enemy = (self.env.chessboard.black, self.env.chessboard.white
                      ) if self.env.next_to_play == Stone.black else (
                          self.env.chessboard.white, self.env.chessboard.black)
        start = time.time()
        self.action = self.ai.think_and_play(own, enemy).action
        end = time.time()
        self.count_one_step = end - start
        self.count_all_step += self.count_one_step
        self.env.do(self.action)

        # notations
        self.last_history = self.ai.thinking_history
        self.last_evaluation = self.last_history.values[
            self.last_history.action]
        self.last_ava = self.ai.avalable
Exemple #8
0
 def __get_next_key(self, own, enemy, action):
     env = OthelloEnv().update(own, enemy, Stone.black)
     env.do(action)
     return create_node(env)
Exemple #9
0
    async def ___recursive_simulation(self,
                                      env: OthelloEnv,
                                      is_root_node=False):
        "fertilize tree process"
        # get both keys
        node, another_side_node = create_both_nodes(env)
        if self.test_mode:
            if (node not in map.keys()):
                map[node] = env.epoch

        # return condition 1
        if env.done:
            if env.result == Result.black:
                return 1
            elif env.result == Result.white:
                return -1
            else:
                return 0

        # return condition 2 : get solver(大于50步,minmax)
        if env.epoch >= self.config.play.use_solver_turn_in_simulation:
            action, point = self.solver.solve(node.black,
                                              node.white,
                                              Stone(node.next_to_play),
                                              exactly=False)
            if action:
                point = point if env.next_to_play == Stone.black else -point
                leaf_v = np.sign(point)
                leaf_p = np.zeros(64)
                leaf_p[action] = 1
                # update tree
                update_num_tree_with_one_or_moresides(self.num_tree, node,
                                                      action, ["plus", "plus"],
                                                      [1, 1])  #走过的位置+1
                update_win_tree_with_one_or_moresides(
                    self.win_tree, node, action, ["plus", "minus"],
                    [leaf_v, leaf_v])  #走此步赢的次数+-1(win)
                update_policy_tree_with_one_or_moresides(
                    self.policy_tree, node, ["set", "set"],
                    [leaf_p, leaf_p])  #此节点应该走的位置(position)
                return np.sign(point)
            if time.time() - self.start_time >= 55:
                return 0
        #return condition 3 : expand tree(小於等於50步,用深度學習)
        while node in self.now_expanding:  # 兩個搜索綫程遇到同一個node,會有衝突的問題
            await asyncio.sleep(self.config.play.wait_for_expanding_sleep_sec)
        # is leaf
        if node not in self.expanded:  # reach leaf node
            leaf_v = await self.____expand_leaf_node(env)
            if env.next_to_play == Stone.black:
                return leaf_v  # Value for black
            else:
                return -leaf_v  # Value for white == -Value for black
        else:  # not leaf do
            virtual_loss_for_w = self.config.play.virtual_loss if env.next_to_play == Stone.black else -self.config.play.virtual_loss
            action_t = self.____decide_action(env, is_root_node)  #UCB公式
            update_num_tree_with_one_or_moresides(
                self.num_tree, node, action_t, ["plus"],
                [self.config.play.virtual_loss])
            update_win_tree_with_one_or_moresides(self.win_tree, node,
                                                  action_t, ["minus"],
                                                  [virtual_loss_for_w])
            env.do(action_t)
            leaf_v = await self.___recursive_simulation(env)  # next move
            # on returning search path
            update_num_tree_with_one_or_moresides(
                self.num_tree, node, action_t, ["plus", "plus"],
                [-self.config.play.virtual_loss + 1, 1])
            update_win_tree_with_one_or_moresides(
                self.win_tree, node, action_t, ["plus", "minus"],
                [virtual_loss_for_w + leaf_v, leaf_v])
            if self.test_mode:
                logger.warning(map[node], leaf_v)
        return leaf_v