def select_action_q_and_u(self, env, is_root_node): key = self.counter_key(env) if env.next_player == Player.black: legal_moves = find_correct_moves(key.black, key.white) else: legal_moves = find_correct_moves(key.white, key.black) # noinspection PyUnresolvedReferences xx_ = np.sqrt(np.sum( self.var_n[key])) # SQRT of sum(N(s, b); for all b) xx_ = max(xx_, 1) # avoid u_=0 if N is all 0 p_ = self.var_p[key] if is_root_node: # Is it correct?? -> (1-e)p + e*Dir(alpha) p_ = (1 - self.play_config.noise_eps) * p_ + \ self.play_config.noise_eps * np.random.dirichlet([self.play_config.dirichlet_alpha] * 64) # re-normalize in legal moves p_ = p_ * bit_to_array(legal_moves, 64) if np.sum(p_) > 0: p_ = p_ / np.sum(p_) u_ = self.play_config.c_puct * p_ * xx_ / (1 + self.var_n[key]) if env.next_player == Player.black: v_ = (self.var_q[key] + u_ + 1000) * bit_to_array(legal_moves, 64) else: # When enemy's selecting action, flip Q-Value. v_ = (-self.var_q[key] + u_ + 1000) * bit_to_array(legal_moves, 64) # noinspection PyTypeChecker action_t = int(np.argmax(v_)) return action_t
def step(self, action): """ :param int action: move pos=0 ~ 63 (0=top left, 7 top right, 63 bottom right) :return: """ assert 0 <= action <= 63, f"Illegal action={action}" own, enemy = self.get_own_and_enemy() flipped = calc_flip(action, own, enemy) if bit_count(flipped) == 0: self.illegal_move_to_lose(action) return self.board, {} own ^= flipped own |= 1 << action enemy ^= flipped self.set_own_and_enemy(own, enemy) self.turn += 1 if bit_count(find_correct_moves( enemy, own)) > 0: # there are legal moves for enemy. self.change_to_next_player() elif bit_count(find_correct_moves( own, enemy)) > 0: # there are legal moves for me but enemy. pass else: # there is no legal moves for me and enemy. self._game_over() return self.board, {}
def select_action_q_and_u(self, env, is_root_node): key = self.counter_key(env) if env.next_player == Player.black: legal_moves = find_correct_moves(key.black, key.white) else: legal_moves = find_correct_moves(key.white, key.black) # noinspection PyUnresolvedReferences xx_ = np.sqrt(np.sum(self.var_n[key])) # SQRT of sum(N(s, b); for all b) xx_ = max(xx_, 1) # avoid u_=0 if N is all 0 p_ = self.var_p[key] # re-normalize in legal moves p_ = p_ * bit_to_array(legal_moves, 225) if np.sum(p_) > 0: # decay policy gradually in the end phase _pc = self.config.play temperature = min(np.exp(1-np.power(env.turn/_pc.policy_decay_turn, _pc.policy_decay_power)), 1) # normalize and decay policy p_ = self.normalize(p_, temperature) if is_root_node and self.play_config.noise_eps > 0: # Is it correct?? -> (1-e)p + e*Dir(alpha) noise = dirichlet_noise_of_mask(legal_moves, self.play_config.dirichlet_alpha) p_ = (1 - self.play_config.noise_eps) * p_ + self.play_config.noise_eps * noise u_ = self.play_config.c_puct * p_ * xx_ / (1 + self.var_n[key]) if env.next_player == Player.black: v_ = (self.var_q(key) + u_ + 1000) * bit_to_array(legal_moves, 225) else: # When enemy's selecting action, flip Q-Value. v_ = (-self.var_q(key) + u_ + 1000) * bit_to_array(legal_moves, 225) # noinspection PyTypeChecker action_t = int(np.argmax(v_)) return action_t
def _should_game_over(self, ): own, enemy = self.get_own_and_enemy() if bit_count(enemy) + bit_count(own) >= 64: return True else: return bit_count(find_correct_moves(enemy, own)) == 0 and \ bit_count(find_correct_moves(own, enemy)) == 0
def find_winning_move_and_score(self, env: ReversiEnv, exactly=True): if env.done: b, w = env.board.number_of_black_and_white return None, b - w if time() - self.start_time > self.timeout: logger.debug("timeout!") raise Timeout() turn = env.turn key = black, white, next_player = env.board.black, env.board.white, env.next_player if key in self.cache: return self.cache[key] if next_player == Player.black: legal_moves = find_correct_moves(black, white) else: legal_moves = find_correct_moves(white, black) action_list = [idx for idx in range(225) if legal_moves & (1 << idx)] score_list = np.zeros(len(action_list), dtype=int) for i, action in enumerate(action_list): # env.update(black, white, next_player) env.board.black = black env.board.white = white env.next_player = next_player env.turn = turn env.done = False env.winner = None # env.step(action) _, score = self.find_winning_move_and_score(env, exactly=exactly) score_list[i] = score if not exactly: # do not need to find the best score move if next_player == Player.black and score > 0: break elif next_player == Player.white and score < 0: break # print(list(zip(action_list, score_list))) if next_player == Player.black: best_action = action_list[int(np.argmax(score_list))] best_score = np.max(score_list) else: best_action = action_list[int(np.argmin(score_list))] best_score = np.min(score_list) self.cache[key] = (best_action, best_score) return best_action, best_score
def bypass_first_move(self, key): legal_array = bit_to_array(find_correct_moves(key.black, key.white), 64) action = np.argmax(legal_array) self.var_n[key][action] = 1 self.var_w[key][action] = 0 self.var_p[key] = legal_array / np.sum(legal_array)
def available(self, px, py): pos = int(py * 8 + px) if pos < 0 or 64 <= pos: return False own, enemy = self.env.board.black, self.env.board.white if self.human_color == Player.white: own, enemy = enemy, own legal_moves = find_correct_moves(own, enemy) return legal_moves & (1 << pos)
def test_find_correct_move(): import spike.bitboard_cython as f import reversi_zero.lib.bitboard as b for ex in examples(): black, white = parse_to_bitboards(ex) assert f.find_correct_moves(black, white) == b.find_correct_moves( black, white) cy = timeit.timeit("f.find_correct_moves(black, white)", globals=locals(), number=10000) py = timeit.timeit("b.find_correct_moves(black, white)", globals=locals(), number=10000) print(f"Cython={cy} : cPython={py}")
def test_calc_flip(): import spike.bitboard_cython as f import reversi_zero.lib.bitboard as b for ex in examples(): black, white = parse_to_bitboards(ex) assert f.find_correct_moves(black, white) == b.find_correct_moves( black, white) legal_moves = f.find_correct_moves(black, white) action_list = [idx for idx in range(64) if legal_moves & (1 << idx)] for action in action_list: assert f.calc_flip(action, black, white) == b.calc_flip(action, black, white) cy = timeit.timeit("f.calc_flip(action, black, white)", globals=locals(), number=10000) py = timeit.timeit("b.calc_flip(action, black, white)", globals=locals(), number=10000) print(f"Cython={cy} : cPython={py}")
def _flip_test(ex, expect, player_black=True): b, w = parse_to_bitboards(ex) moves = find_correct_moves(b, w) if player_black else find_correct_moves( w, b) res = board_to_string(b, w, extra=moves) eq_(res.strip(), expect.strip(), f"\n{res}----{expect}")
def legal_moves(self): own, enemy = self.get_own_and_enemy() bit = find_correct_moves(own, enemy) array = bit_to_array(bit, 64) # array = np.append(array, 1 if bit == 0 else 0) # if no correct move, then you can pass return array