Esempio n. 1
0
def expand_pocliy_random(board: Board):
    board_array = board.get_board_array()
    conv_available = _convolve_board_available_narrow(board_array)
    cnt = conv_available.sum()
    probability = conv_available / cnt


    return_list = []
    for move in board.available:
        if conv_available[move]:
            return_list.append((move, probability[move]))

    return return_list
Esempio n. 2
0
def simulate_network(board: Board, limit=100, q_confidence=0.5):
    random_bound = 4
    net_run_time = 0.
    start_time = time.time() * 1000

    is_end, winner = False, None

    for i in range(limit):
        is_end, winner = board.check_winner()
        if is_end:
            # print(i, 'end')
            break

        must = board.check_must()
        if must is not None:
            if not board.play(must):
                raise ValueError('Must Error')
            continue

        t1 = time.time() * 1000
        action_prob = rollout_policy_network(board)
        # action_prob = policy(board)
        t2 = time.time() * 1000
        net_run_time += t2 - t1

        if i < random_bound:
            next_action = np.random.choice(15 * 15, 1, p=action_prob)[0]

            while not board.play(next_action):
                next_action = np.random.choice(15 * 15, 1, p=action_prob)[0]

        else:
            next_action = max(board.available,
                              key=lambda move: action_prob[move])
            board.play(next_action)

    end_time = time.time() * 1000

    # print('%.5fms  %.5fms  %.5f%%' % (end_time - start_time, net_run_time, net_run_time * 100 / (end_time - start_time)))
    if is_end:
        if winner is not None:
            return winner
    value_network_Q = ValueRunner(board.get_board_array())
    if board.current_player == BLACK_:  # array shape (white, black)
        value_network_Q = 1 - value_network_Q  # change to black side
    return q_confidence * (value_network_Q - 0.5) + 0.5
Esempio n. 3
0
def expand_policy_network(board: Board) -> List[Tuple[int, float]]:
    board_array = board.get_board_array()
    probability = TreePolicyRunner(board_array)

    if len(board.moved) == 1:
        if board.moved[0] == move_xy2int(7, 7):
            return list(map(lambda move: (move, probability[move]), __second_move_available))
    if len(board.must[board.current_player]):
        return list(map(lambda move: (move, probability[move]), (board.must[board.current_player])))
    if len(board.must[0] | board.must[1]):
        return list(map(lambda move: (move, probability[move]), (board.must[0] | board.must[1])))

    conv_available = _convolve_board_available_wide(board_array)
    return_list = []

    for move in board.available:
        if conv_available[move]:
            return_list.append((move, probability[move]))

    return return_list
Esempio n. 4
0
def rollout_policy_network(board: Board):
    probability = RolloutPolicyRunner(board.get_board_array())

    return probability
Esempio n. 5
0
def rollout_policy_random(board: Board):
    board_array = board.get_board_array()
    conv_available = _convolve_board_available_narrow(board_array)
    probability = np.random.rand(15 * 15) * conv_available

    return probability