def test_expected_utility_three_player(): num_players = 3 node_map = {i: {} for i in range(num_players)} action_map = {i: {} for i in range(num_players)} cards = [Card(14, 1), Card(13, 1), Card(12, 1), Card(11, 1)] learn(20000, cards, 3, node_map, action_map) util = expected_utility(cards, 3, 3, kuhn_eval, node_map, action_map) print(util) print(json.dumps(action_map, indent=4)) print(node_map) assert abs(util.sum()) <= 0.0001, f"Something weird, not a zero sum game" assert np.abs(util).sum() > 0, f"Util was {util}"
def test_expected_utility(): num_players = 2 node_map = {i: {} for i in range(num_players)} action_map = {i: {} for i in range(num_players)} cards = [Card(14, 1), Card(13, 1), Card(12, 1)] learn(20000, cards, 2, node_map, action_map) util = expected_utility(cards, 2, 2, kuhn_eval, node_map, action_map) print(util) print(json.dumps(action_map, indent=4)) print(node_map) assert util.sum() == 0 assert np.isclose(np.abs(util[0]), np.abs(util[1])), f"Util was {util}" assert np.abs(util).sum() > 0, f"Util was {util}" assert abs(util[1] - 1 / 18) <= .01, f"Util not converging {util}"
def learn(iterations, node_map, action_map): num_players = len(node_map) cards = [Card(14 - i, 1) for i in range(num_players + 1)] for i in tqdm(range(iterations), desc="learning"): np.random.shuffle(cards) for player in range(num_players): state = State(cards, num_players, 1, kuhn_eval) probs = np.ones(num_players) accumulate_regrets(state, node_map, action_map, probs)
def test_update_strategy(): num_players = 2 node_map = {i: {} for i in range(num_players)} action_map = {i: {} for i in range(num_players)} n1 = Node(['F', 'C', '1R']) n1.regret_sum = {'F': 0, 'C': 1, '1R': 0} n2 = Node(['F', 'C', '1R']) n2.regret_sum = {'F': 1, 'C': 0, '1R': 1} node_map[0]['As || [[]]'] = n1 node_map[0]["As || [['C', '1R']]"] = n2 cards = [Card(14, 1), Card(13, 1), Card(12, 1)] state = State(cards, num_players, 1, kuhn_eval) update_strategy(0, state, node_map, action_map) assert sum(n1.strategy_sum.values()) > 0, f'Util\n{n1}, \nNodes\n{node_map}, Actions\n{json.dumps(action_map, indent=4)}' assert sum(n2.strategy_sum.values()) > 0, f'Util\n{n1}, \nNodes\n{node_map}, Actions\n{json.dumps(action_map, indent=4)}'
def test_expected_utility(): num_players = 2 node_map = {i: {} for i in range(num_players)} action_map = {i: {} for i in range(num_players)} cards = [Card(14, 1), Card(13, 1), Card(12, 1)] learn(10000, cards, 2, node_map, action_map) exploit = exploitability(cards, 2, node_map, action_map) print(exploit) assert exploit < .001, f"Exploitability was : {exploit}" util = expected_utility(cards, 2, 2, node_map, action_map) print(util) print(json.dumps(action_map, indent=4)) print(node_map) assert util.sum() == 0 assert np.isclose(np.abs(util[0]), np.abs(util[1])), f"Util was {util}" assert np.abs(util).sum() > 0, f"Util was {util}"
def test_leduc_showdown(): cards = [ Card(14, 1), Card(13, 1), Card(12, 1), Card(14, 2), Card(13, 2), Card(12, 2) ] state = Leduc(cards, 2, leduc_eval) state = state.take('2R', deep=True) state = state.take('2R', deep=True) state = state.take('C', deep=True) assert state.round == 1 and state.turn == 0, f'{state.round, state.turn}' state = state.take('4R', deep=True) assert state.turn == 1, state.turn state = state.take('C', deep=True) assert state.terminal is True and np.array_equal( state.utility(), np.array([9, -9])), f'{state.utility(), state.cards}'
def test_leduc_state(): cards = [ Card(14, 1), Card(13, 1), Card(12, 1), Card(14, 2), Card(13, 2), Card(12, 2) ] state = Leduc(cards, 2, leduc_eval) assert state.turn == 0, state.turn state = state.take('C', deep=True) assert state.turn == 1 and state.round == 0, f'{state.turn, state.round}' state = state.take('2R', deep=True) assert state.turn == 0 and state.round == 0, f'{state.turn, state.round}' state = state.take('C', deep=True) assert state.turn == 0 and state.round == 1, f'{state.turn, state.round}' state = state.take('F', deep=True) assert state.terminal is True and np.array_equal( state.utility(), np.array([-3, 3 ])), f'{state.terminal, state.utility()}'
def test_hand_eval(): cards = [ Card(14, 1), Card(13, 1), Card(12, 1), Card(14, 2), Card(13, 2), Card(12, 2) ] score = leduc_eval(cards[0], [cards[3]]) assert score == 224
def test_kuhn_utility(): cards = [Card(14, 1), Card(13, 1), Card(12, 1)] state = State(cards, 2, kuhn_eval) state.take('C') state.take('C') utility = state.utility() assert np.array_equal(utility, np.array([1, -1])), utility cards = [Card(14, 1), Card(13, 1), Card(12, 1)] state = State(cards, 2, kuhn_eval) state.take('1R') state.take('C') utility = state.utility() assert np.array_equal(utility, np.array([2, -2])), utility
node = node_map[hand.turn][info_set] strategy = node.avg_strategy() util = np.zeros(len(node_map)) valid_actions = action_map[hand.turn][info_set] for action in valid_actions: new_hand = hand.take(action, deep=True) util += traverse_tree(new_hand, node_map, action_map) * strategy[action] return util if __name__ == '__main__': if len(sys.argv) > 1: try: num_players = int(sys.argv[1]) except ValueError: raise ValueError("must pass an int for number of players") else: num_players = 2 node_map = {i: {} for i in range(num_players)} action_map = {i: {} for i in range(num_players)} learn(10000, node_map, action_map) cards = [Card(14, 1), Card(13, 1), Card(12, 1)] util = expected_utility(cards, 2, 2, kuhn_eval, node_map, action_map) print(util) print(node_map) print(json.dumps(action_map, indent=4))
if p != state.turn: reach_prob *= prob for action in valid_actions: regret = util[action] - node_util[state.turn] node.regret_sum[action] += regret * reach_prob return node_util if __name__ == '__main__': num_players = 2 node_map = {i: {} for i in range(num_players)} action_map = {i: {} for i in range(num_players)} cards = [ Card(14, 1), Card(13, 1), Card(12, 1), Card(14, 2), Card(13, 2), Card(12, 2) ] learn(10000, cards, 3, node_map, action_map) exploit = exploitability(cards, 3, node_map, action_map) print(exploit) for player in node_map: print(f"Player {player}") print('Number of info sets', len(node_map[player])) for info_set, node in node_map[player].items(): avg_strat = node.avg_strategy()
util = np.zeros(len(node_map)) valid_actions = action_map[hand.turn][info_set] for action in valid_actions: new_hand = hand.take(action, deep=True) util += traverse_tree(new_hand, node_map, action_map) * strategy[action] return util if __name__ == '__main__': if len(sys.argv) > 1: try: num_players = int(sys.argv[1]) except ValueError: raise ValueError("must pass an int for number of players") else: num_players = 2 node_map = {i: {} for i in range(num_players)} action_map = {i: {} for i in range(num_players)} cards = [Card(14 - i, 1) for i in range(num_players + 1)] learn(10000, cards, num_players, node_map, action_map) exploit = exploitability(cards, 2, node_map, action_map) print(exploit) util = expected_utility(cards, 2, 2, node_map, action_map) print(util) print(node_map) print(json.dumps(action_map, indent=4))
return node_util else: actions = list(strategy.keys()) probs = list(strategy.values()) random_action = actions[np.random.choice(len(actions), p=probs)] new_state = state.take(random_action, deep=True) return accumulate_regrets(traverser, new_state, node_map, action_map, prune=prune) if __name__ == '__main__': num_players = 2 node_map = {i: {} for i in range(num_players)} action_map = {i: {} for i in range(num_players)} cards = [Card(14, 1), Card(13, 1), Card(12, 1), Card(14, 2), Card(13, 2), Card(12, 2)] learn(50000, cards, 3, node_map, action_map) exploit = exploitability(cards, 3, node_map, action_map) print(exploit) for player in node_map: print(f"Player {player}") print('Number of info sets', len(node_map[player])) for info_set, node in node_map[player].items(): avg_strat = node.avg_strategy() print(f"{info_set}: {avg_strat}") util = expected_utility(cards, 3, 2, node_map, action_map) print(util)