コード例 #1
0
ファイル: mcts.py プロジェクト: wrongu/QuoridorV2
 def __init__(self, game_state:Quoridor, policy_output, value_output):
     # _counts is the number of times we've taken some action *from this state*. Initialized to all zeros. Stored
     # as a torch tensor over all possible actions, to be later masked with the set of legal actions
     self._counts = torch.zeros(3, 9, 9)
     self._total_reward = torch.zeros(3, 9, 9)
     self._policy = policy_output
     self._value = value_output
     self._legal_mask = encode_actions_to_planes(game_state.all_legal_moves(), game_state.current_player)
     self._player = game_state.current_player
     self._key = game_state.hash_key()
     self._children = {}
     self.__flagged = False
コード例 #2
0
                              col) + "v"

    if temperature < 1e-6:
        # Do max operation instead of unstable low-temperature manipulations
        idx = torch.argmax(policy_planes)
    else:
        idx = torch.multinomial(policy_planes.flatten()**temperature,
                                num_samples=1)
    return _idx_to_action(idx.item())


if __name__ == '__main__':
    # mini test
    q = Quoridor()

    legal_moves = q.all_legal_moves(partial_check=False)
    print("INITIAL STATE LEGAL MOVES ({} of them):".format(len(legal_moves)))
    print(legal_moves)

    for mv in legal_moves:
        planes = encode_actions_to_planes(mv, q.current_player)
        print("=========== {} ============".format(mv))
        print(planes)
        mv2 = sample_action(planes, 0)
        print(mv2)
        assert mv2 == mv, "Failed to encode/decode {}".format(mv)

    # Test that just sampling random moves leads to some illegal moves getting selected (this is expected)
    random_actions, masked_random_actions = [''] * 100, [''] * 100
    legal_mask = encode_actions_to_planes(legal_moves, q.current_player)
    for i in range(100):