コード例 #1
0
ファイル: player.py プロジェクト: zhusleep/reversi-alpha-zero
    def select_action_q_and_u(self, env, is_root_node):
        key = self.counter_key(env)
        if env.next_player == Player.black:
            legal_moves = find_correct_moves(key.black, key.white)
        else:
            legal_moves = find_correct_moves(key.white, key.black)
        # noinspection PyUnresolvedReferences
        xx_ = np.sqrt(np.sum(
            self.var_n[key]))  # SQRT of sum(N(s, b); for all b)
        xx_ = max(xx_, 1)  # avoid u_=0 if N is all 0
        p_ = self.var_p[key]

        if is_root_node:  # Is it correct?? -> (1-e)p + e*Dir(alpha)
            p_ = (1 - self.play_config.noise_eps) * p_ + \
                 self.play_config.noise_eps * np.random.dirichlet([self.play_config.dirichlet_alpha] * 64)

        # re-normalize in legal moves
        p_ = p_ * bit_to_array(legal_moves, 64)
        if np.sum(p_) > 0:
            p_ = p_ / np.sum(p_)

        u_ = self.play_config.c_puct * p_ * xx_ / (1 + self.var_n[key])
        if env.next_player == Player.black:
            v_ = (self.var_q[key] + u_ + 1000) * bit_to_array(legal_moves, 64)
        else:
            # When enemy's selecting action, flip Q-Value.
            v_ = (-self.var_q[key] + u_ + 1000) * bit_to_array(legal_moves, 64)

        # noinspection PyTypeChecker
        action_t = int(np.argmax(v_))
        return action_t
コード例 #2
0
    def step(self, action):
        """

        :param int action: move pos=0 ~ 63 (0=top left, 7 top right, 63 bottom right)
        :return:
        """
        assert 0 <= action <= 63, f"Illegal action={action}"

        own, enemy = self.get_own_and_enemy()

        flipped = calc_flip(action, own, enemy)
        if bit_count(flipped) == 0:
            self.illegal_move_to_lose(action)
            return self.board, {}
        own ^= flipped
        own |= 1 << action
        enemy ^= flipped

        self.set_own_and_enemy(own, enemy)
        self.turn += 1

        if bit_count(find_correct_moves(
                enemy, own)) > 0:  # there are legal moves for enemy.
            self.change_to_next_player()
        elif bit_count(find_correct_moves(
                own, enemy)) > 0:  # there are legal moves for me but enemy.
            pass
        else:  # there is no legal moves for me and enemy.
            self._game_over()

        return self.board, {}
コード例 #3
0
    def select_action_q_and_u(self, env, is_root_node):
        key = self.counter_key(env)
        if env.next_player == Player.black:
            legal_moves = find_correct_moves(key.black, key.white)
        else:
            legal_moves = find_correct_moves(key.white, key.black)
        # noinspection PyUnresolvedReferences
        xx_ = np.sqrt(np.sum(self.var_n[key]))  # SQRT of sum(N(s, b); for all b)
        xx_ = max(xx_, 1)  # avoid u_=0 if N is all 0
        p_ = self.var_p[key]

        # re-normalize in legal moves
        p_ = p_ * bit_to_array(legal_moves, 225)
        if np.sum(p_) > 0:
            # decay policy gradually in the end phase
            _pc = self.config.play
            temperature = min(np.exp(1-np.power(env.turn/_pc.policy_decay_turn, _pc.policy_decay_power)), 1)
            # normalize and decay policy
            p_ = self.normalize(p_, temperature)

        if is_root_node and self.play_config.noise_eps > 0:  # Is it correct?? -> (1-e)p + e*Dir(alpha)
            noise = dirichlet_noise_of_mask(legal_moves, self.play_config.dirichlet_alpha)
            p_ = (1 - self.play_config.noise_eps) * p_ + self.play_config.noise_eps * noise

        u_ = self.play_config.c_puct * p_ * xx_ / (1 + self.var_n[key])
        if env.next_player == Player.black:
            v_ = (self.var_q(key) + u_ + 1000) * bit_to_array(legal_moves, 225)
        else:
            # When enemy's selecting action, flip Q-Value.
            v_ = (-self.var_q(key) + u_ + 1000) * bit_to_array(legal_moves, 225)

        # noinspection PyTypeChecker
        action_t = int(np.argmax(v_))
        return action_t
コード例 #4
0
    def _should_game_over(self, ):
        own, enemy = self.get_own_and_enemy()

        if bit_count(enemy) + bit_count(own) >= 64:
            return True
        else:
            return bit_count(find_correct_moves(enemy, own)) == 0 and \
                   bit_count(find_correct_moves(own, enemy)) == 0
コード例 #5
0
    def find_winning_move_and_score(self, env: ReversiEnv, exactly=True):
        if env.done:
            b, w = env.board.number_of_black_and_white
            return None, b - w
        if time() - self.start_time > self.timeout:
            logger.debug("timeout!")
            raise Timeout()

        turn = env.turn
        key = black, white, next_player = env.board.black, env.board.white, env.next_player
        if key in self.cache:
            return self.cache[key]

        if next_player == Player.black:
            legal_moves = find_correct_moves(black, white)
        else:
            legal_moves = find_correct_moves(white, black)

        action_list = [idx for idx in range(225) if legal_moves & (1 << idx)]
        score_list = np.zeros(len(action_list), dtype=int)
        for i, action in enumerate(action_list):
            # env.update(black, white, next_player)
            env.board.black = black
            env.board.white = white
            env.next_player = next_player
            env.turn = turn
            env.done = False
            env.winner = None
            #
            env.step(action)
            _, score = self.find_winning_move_and_score(env, exactly=exactly)
            score_list[i] = score

            if not exactly:
                # do not need to find the best score move
                if next_player == Player.black and score > 0:
                    break
                elif next_player == Player.white and score < 0:
                    break

        # print(list(zip(action_list, score_list)))

        if next_player == Player.black:
            best_action = action_list[int(np.argmax(score_list))]
            best_score = np.max(score_list)
        else:
            best_action = action_list[int(np.argmin(score_list))]
            best_score = np.min(score_list)

        self.cache[key] = (best_action, best_score)
        return best_action, best_score
コード例 #6
0
 def bypass_first_move(self, key):
     legal_array = bit_to_array(find_correct_moves(key.black, key.white),
                                64)
     action = np.argmax(legal_array)
     self.var_n[key][action] = 1
     self.var_w[key][action] = 0
     self.var_p[key] = legal_array / np.sum(legal_array)
コード例 #7
0
 def available(self, px, py):
     pos = int(py * 8 + px)
     if pos < 0 or 64 <= pos:
         return False
     own, enemy = self.env.board.black, self.env.board.white
     if self.human_color == Player.white:
         own, enemy = enemy, own
     legal_moves = find_correct_moves(own, enemy)
     return legal_moves & (1 << pos)
コード例 #8
0
def test_find_correct_move():
    import spike.bitboard_cython as f
    import reversi_zero.lib.bitboard as b

    for ex in examples():
        black, white = parse_to_bitboards(ex)
        assert f.find_correct_moves(black, white) == b.find_correct_moves(
            black, white)
        cy = timeit.timeit("f.find_correct_moves(black, white)",
                           globals=locals(),
                           number=10000)
        py = timeit.timeit("b.find_correct_moves(black, white)",
                           globals=locals(),
                           number=10000)
        print(f"Cython={cy} : cPython={py}")
コード例 #9
0
def test_calc_flip():
    import spike.bitboard_cython as f
    import reversi_zero.lib.bitboard as b

    for ex in examples():
        black, white = parse_to_bitboards(ex)
        assert f.find_correct_moves(black, white) == b.find_correct_moves(
            black, white)
        legal_moves = f.find_correct_moves(black, white)
        action_list = [idx for idx in range(64) if legal_moves & (1 << idx)]

        for action in action_list:
            assert f.calc_flip(action, black,
                               white) == b.calc_flip(action, black, white)
            cy = timeit.timeit("f.calc_flip(action, black, white)",
                               globals=locals(),
                               number=10000)
            py = timeit.timeit("b.calc_flip(action, black, white)",
                               globals=locals(),
                               number=10000)
            print(f"Cython={cy} : cPython={py}")
コード例 #10
0
def _flip_test(ex, expect, player_black=True):
    b, w = parse_to_bitboards(ex)
    moves = find_correct_moves(b, w) if player_black else find_correct_moves(
        w, b)
    res = board_to_string(b, w, extra=moves)
    eq_(res.strip(), expect.strip(), f"\n{res}----{expect}")
コード例 #11
0
 def legal_moves(self):
     own, enemy = self.get_own_and_enemy()
     bit = find_correct_moves(own, enemy)
     array = bit_to_array(bit, 64)
     # array = np.append(array, 1 if bit == 0 else 0)  # if no correct move, then you can pass
     return array