Ejemplo n.º 1
0
def test_expected_utility_three_player():
    num_players = 3
    node_map = {i: {} for i in range(num_players)}
    action_map = {i: {} for i in range(num_players)}
    cards = [Card(14, 1), Card(13, 1), Card(12, 1), Card(11, 1)]
    learn(20000, cards, 3, node_map, action_map)

    util = expected_utility(cards, 3, 3, kuhn_eval, node_map, action_map)

    print(util)
    print(json.dumps(action_map, indent=4))
    print(node_map)

    assert abs(util.sum()) <= 0.0001, f"Something weird, not a zero sum game"
    assert np.abs(util).sum() > 0, f"Util was {util}"
Ejemplo n.º 2
0
def test_expected_utility():
    num_players = 2
    node_map = {i: {} for i in range(num_players)}
    action_map = {i: {} for i in range(num_players)}
    cards = [Card(14, 1), Card(13, 1), Card(12, 1)]
    learn(20000, cards, 2, node_map, action_map)

    util = expected_utility(cards, 2, 2, kuhn_eval, node_map, action_map)

    print(util)
    print(json.dumps(action_map, indent=4))
    print(node_map)

    assert util.sum() == 0
    assert np.isclose(np.abs(util[0]), np.abs(util[1])), f"Util was {util}"
    assert np.abs(util).sum() > 0, f"Util was {util}"
    assert abs(util[1] - 1 / 18) <= .01, f"Util not converging {util}"
Ejemplo n.º 3
0
def learn(iterations, node_map, action_map):
    num_players = len(node_map)
    cards = [Card(14 - i, 1) for i in range(num_players + 1)]
    for i in tqdm(range(iterations), desc="learning"):
        np.random.shuffle(cards)
        for player in range(num_players):
            state = State(cards, num_players, 1, kuhn_eval)
            probs = np.ones(num_players)
            accumulate_regrets(state, node_map, action_map, probs)
Ejemplo n.º 4
0
def test_update_strategy():
    num_players = 2
    node_map = {i: {} for i in range(num_players)}
    action_map = {i: {} for i in range(num_players)}
    n1 = Node(['F', 'C', '1R'])
    n1.regret_sum = {'F': 0, 'C': 1, '1R': 0}

    n2 = Node(['F', 'C', '1R'])
    n2.regret_sum = {'F': 1, 'C': 0, '1R': 1}

    node_map[0]['As || [[]]'] = n1
    node_map[0]["As || [['C', '1R']]"] = n2
    cards = [Card(14, 1), Card(13, 1), Card(12, 1)]
    state = State(cards, num_players, 1, kuhn_eval)

    update_strategy(0, state, node_map, action_map)

    assert sum(n1.strategy_sum.values()) > 0, f'Util\n{n1}, \nNodes\n{node_map}, Actions\n{json.dumps(action_map, indent=4)}'
    assert sum(n2.strategy_sum.values()) > 0, f'Util\n{n1}, \nNodes\n{node_map}, Actions\n{json.dumps(action_map, indent=4)}'
Ejemplo n.º 5
0
def test_expected_utility():
    num_players = 2
    node_map = {i: {} for i in range(num_players)}
    action_map = {i: {} for i in range(num_players)}
    cards = [Card(14, 1), Card(13, 1), Card(12, 1)]
    learn(10000, cards, 2, node_map, action_map)

    exploit = exploitability(cards, 2, node_map, action_map)
    print(exploit)

    assert exploit < .001, f"Exploitability was : {exploit}"

    util = expected_utility(cards, 2, 2, node_map, action_map)
    print(util)
    print(json.dumps(action_map, indent=4))
    print(node_map)

    assert util.sum() == 0
    assert np.isclose(np.abs(util[0]), np.abs(util[1])), f"Util was {util}"
    assert np.abs(util).sum() > 0, f"Util was {util}"
Ejemplo n.º 6
0
def test_leduc_showdown():
    cards = [
        Card(14, 1),
        Card(13, 1),
        Card(12, 1),
        Card(14, 2),
        Card(13, 2),
        Card(12, 2)
    ]

    state = Leduc(cards, 2, leduc_eval)

    state = state.take('2R', deep=True)
    state = state.take('2R', deep=True)
    state = state.take('C', deep=True)

    assert state.round == 1 and state.turn == 0, f'{state.round, state.turn}'

    state = state.take('4R', deep=True)

    assert state.turn == 1, state.turn

    state = state.take('C', deep=True)

    assert state.terminal is True and np.array_equal(
        state.utility(), np.array([9, -9])), f'{state.utility(), state.cards}'
Ejemplo n.º 7
0
def test_leduc_state():
    cards = [
        Card(14, 1),
        Card(13, 1),
        Card(12, 1),
        Card(14, 2),
        Card(13, 2),
        Card(12, 2)
    ]

    state = Leduc(cards, 2, leduc_eval)

    assert state.turn == 0, state.turn

    state = state.take('C', deep=True)

    assert state.turn == 1 and state.round == 0, f'{state.turn, state.round}'

    state = state.take('2R', deep=True)
    assert state.turn == 0 and state.round == 0, f'{state.turn, state.round}'

    state = state.take('C', deep=True)
    assert state.turn == 0 and state.round == 1, f'{state.turn, state.round}'

    state = state.take('F', deep=True)
    assert state.terminal is True and np.array_equal(
        state.utility(), np.array([-3, 3
                                   ])), f'{state.terminal, state.utility()}'
Ejemplo n.º 8
0
def test_hand_eval():
    cards = [
        Card(14, 1),
        Card(13, 1),
        Card(12, 1),
        Card(14, 2),
        Card(13, 2),
        Card(12, 2)
    ]

    score = leduc_eval(cards[0], [cards[3]])

    assert score == 224
Ejemplo n.º 9
0
def test_kuhn_utility():
    cards = [Card(14, 1), Card(13, 1), Card(12, 1)]
    state = State(cards, 2, kuhn_eval)

    state.take('C')
    state.take('C')

    utility = state.utility()

    assert np.array_equal(utility, np.array([1, -1])), utility

    cards = [Card(14, 1), Card(13, 1), Card(12, 1)]
    state = State(cards, 2, kuhn_eval)

    state.take('1R')
    state.take('C')

    utility = state.utility()

    assert np.array_equal(utility, np.array([2, -2])), utility
Ejemplo n.º 10
0
    node = node_map[hand.turn][info_set]

    strategy = node.avg_strategy()
    util = np.zeros(len(node_map))
    valid_actions = action_map[hand.turn][info_set]
    for action in valid_actions:
        new_hand = hand.take(action, deep=True)
        util += traverse_tree(new_hand, node_map, action_map) * strategy[action]

    return util


if __name__ == '__main__':
    if len(sys.argv) > 1:
        try:
            num_players = int(sys.argv[1])
        except ValueError:
            raise ValueError("must pass an int for number of players")
    else:
        num_players = 2

    node_map = {i: {} for i in range(num_players)}
    action_map = {i: {} for i in range(num_players)}
    learn(10000, node_map, action_map)

    cards = [Card(14, 1), Card(13, 1), Card(12, 1)]
    util = expected_utility(cards, 2, 2, kuhn_eval, node_map, action_map)
    print(util)
    print(node_map)
    print(json.dumps(action_map, indent=4))
Ejemplo n.º 11
0
        if p != state.turn:
            reach_prob *= prob

    for action in valid_actions:
        regret = util[action] - node_util[state.turn]
        node.regret_sum[action] += regret * reach_prob

    return node_util


if __name__ == '__main__':
    num_players = 2
    node_map = {i: {} for i in range(num_players)}
    action_map = {i: {} for i in range(num_players)}
    cards = [
        Card(14, 1),
        Card(13, 1),
        Card(12, 1),
        Card(14, 2),
        Card(13, 2),
        Card(12, 2)
    ]
    learn(10000, cards, 3, node_map, action_map)
    exploit = exploitability(cards, 3, node_map, action_map)
    print(exploit)

    for player in node_map:
        print(f"Player {player}")
        print('Number of info sets', len(node_map[player]))
        for info_set, node in node_map[player].items():
            avg_strat = node.avg_strategy()
Ejemplo n.º 12
0
    util = np.zeros(len(node_map))
    valid_actions = action_map[hand.turn][info_set]
    for action in valid_actions:
        new_hand = hand.take(action, deep=True)
        util += traverse_tree(new_hand, node_map,
                              action_map) * strategy[action]

    return util


if __name__ == '__main__':
    if len(sys.argv) > 1:
        try:
            num_players = int(sys.argv[1])
        except ValueError:
            raise ValueError("must pass an int for number of players")
    else:
        num_players = 2

    node_map = {i: {} for i in range(num_players)}
    action_map = {i: {} for i in range(num_players)}
    cards = [Card(14 - i, 1) for i in range(num_players + 1)]
    learn(10000, cards, num_players, node_map, action_map)
    exploit = exploitability(cards, 2, node_map, action_map)
    print(exploit)

    util = expected_utility(cards, 2, 2, node_map, action_map)
    print(util)
    print(node_map)
    print(json.dumps(action_map, indent=4))
Ejemplo n.º 13
0
        return node_util

    else:
        actions = list(strategy.keys())
        probs = list(strategy.values())
        random_action = actions[np.random.choice(len(actions), p=probs)]
        new_state = state.take(random_action, deep=True)
        return accumulate_regrets(traverser, new_state, node_map, action_map,
                                  prune=prune)


if __name__ == '__main__':
    num_players = 2
    node_map = {i: {} for i in range(num_players)}
    action_map = {i: {} for i in range(num_players)}
    cards = [Card(14, 1), Card(13, 1), Card(12, 1), Card(14, 2), Card(13, 2), Card(12, 2)]
    learn(50000, cards, 3, node_map, action_map)
    exploit = exploitability(cards, 3, node_map, action_map)
    print(exploit)

    for player in node_map:
        print(f"Player {player}")
        print('Number of info sets', len(node_map[player]))
        for info_set, node in node_map[player].items():
            avg_strat = node.avg_strategy()
            print(f"{info_set}: {avg_strat}")
        

    util = expected_utility(cards, 3, 2, node_map, action_map)
    print(util)