Ejemplo n.º 1
0
def test_expected_utility():
    num_players = 2
    node_map = {i: {} for i in range(num_players)}
    action_map = {i: {} for i in range(num_players)}
    cards = [Card(14, 1), Card(13, 1), Card(12, 1)]
    learn(10000, cards, 2, node_map, action_map)

    exploit = exploitability(cards, 2, node_map, action_map)
    print(exploit)

    assert exploit < .001, f"Exploitability was : {exploit}"

    util = expected_utility(cards, 2, 2, node_map, action_map)
    print(util)
    print(json.dumps(action_map, indent=4))
    print(node_map)

    assert util.sum() == 0
    assert np.isclose(np.abs(util[0]), np.abs(util[1])), f"Util was {util}"
    assert np.abs(util).sum() > 0, f"Util was {util}"
Ejemplo n.º 2
0
        regret = util[action] - node_util[state.turn]
        node.regret_sum[action] += regret * reach_prob

    return node_util


if __name__ == '__main__':
    num_players = 2
    node_map = {i: {} for i in range(num_players)}
    action_map = {i: {} for i in range(num_players)}
    cards = [
        Card(14, 1),
        Card(13, 1),
        Card(12, 1),
        Card(14, 2),
        Card(13, 2),
        Card(12, 2)
    ]
    learn(10000, cards, 3, node_map, action_map)
    exploit = exploitability(cards, 3, node_map, action_map)
    print(exploit)

    for player in node_map:
        print(f"Player {player}")
        print('Number of info sets', len(node_map[player]))
        for info_set, node in node_map[player].items():
            avg_strat = node.avg_strategy()
            print(f"{info_set}: {avg_strat}")

    util = expected_utility(cards, 3, 2, node_map, action_map)
    print(util)