def expand_pocliy_random(board: Board): board_array = board.get_board_array() conv_available = _convolve_board_available_narrow(board_array) cnt = conv_available.sum() probability = conv_available / cnt return_list = [] for move in board.available: if conv_available[move]: return_list.append((move, probability[move])) return return_list
def simulate_network(board: Board, limit=100, q_confidence=0.5): random_bound = 4 net_run_time = 0. start_time = time.time() * 1000 is_end, winner = False, None for i in range(limit): is_end, winner = board.check_winner() if is_end: # print(i, 'end') break must = board.check_must() if must is not None: if not board.play(must): raise ValueError('Must Error') continue t1 = time.time() * 1000 action_prob = rollout_policy_network(board) # action_prob = policy(board) t2 = time.time() * 1000 net_run_time += t2 - t1 if i < random_bound: next_action = np.random.choice(15 * 15, 1, p=action_prob)[0] while not board.play(next_action): next_action = np.random.choice(15 * 15, 1, p=action_prob)[0] else: next_action = max(board.available, key=lambda move: action_prob[move]) board.play(next_action) end_time = time.time() * 1000 # print('%.5fms %.5fms %.5f%%' % (end_time - start_time, net_run_time, net_run_time * 100 / (end_time - start_time))) if is_end: if winner is not None: return winner value_network_Q = ValueRunner(board.get_board_array()) if board.current_player == BLACK_: # array shape (white, black) value_network_Q = 1 - value_network_Q # change to black side return q_confidence * (value_network_Q - 0.5) + 0.5
def expand_policy_network(board: Board) -> List[Tuple[int, float]]: board_array = board.get_board_array() probability = TreePolicyRunner(board_array) if len(board.moved) == 1: if board.moved[0] == move_xy2int(7, 7): return list(map(lambda move: (move, probability[move]), __second_move_available)) if len(board.must[board.current_player]): return list(map(lambda move: (move, probability[move]), (board.must[board.current_player]))) if len(board.must[0] | board.must[1]): return list(map(lambda move: (move, probability[move]), (board.must[0] | board.must[1]))) conv_available = _convolve_board_available_wide(board_array) return_list = [] for move in board.available: if conv_available[move]: return_list.append((move, probability[move])) return return_list
def rollout_policy_network(board: Board): probability = RolloutPolicyRunner(board.get_board_array()) return probability
def rollout_policy_random(board: Board): board_array = board.get_board_array() conv_available = _convolve_board_available_narrow(board_array) probability = np.random.rand(15 * 15) * conv_available return probability