def test_turn(): state = State([i for i in range(6)], 2, None) assert state.turn == 0, f'State {state}' state = state.take('C', deep=True) assert state.turn == 1, f'State {state}' state = state.take('C', deep=True) assert state.turn == 0, f'State {state}' state = State([i for i in range(6)], 2, None) state = state.take('2R', deep=True) assert state.turn == 1, f'State {state}' state = state.take('2R', deep=True) assert state.turn == 0, f'State {state}' state = State([i for i in range(6)], 2, None) state = state.take('C', deep=True) assert state.turn == 1, f'State {state}' state = state.take('2R', deep=True) assert state.turn == 0, f'State {state}' state = state.take('2R', deep=True) assert state.turn == 1, f'State {state}'
def test_valid_actions(): state = State([i for i in range(6)], 2, None) state.take('C') new_state = state.take('C', deep=True) assert (state.valid_actions() == ['F', 'C', '2R'] and new_state.valid_actions() == ['F', 'C', '4R']), f'Actions {state.valid_actions()}' state = State([i for i in range(6)], 2, None) state = state.take('2R', deep=True) assert state.players[0].bets == 3, f'Bet {state.players[0].bets}' assert state.valid_actions() == ['F', 'C', '2R'], f'Actions {state.valid_actions()}' state = state.take('2R', deep=True) assert state.players[1].bets == 5, f'Bet {state.players[0].bets}' assert state.round == 0 and state.valid_actions() == [ 'F', 'C' ], f'Actions {state.valid_actions()}' state = state.take('C', deep=True) assert all([p.bets == 5 for p in state.players]), f'Bet {state.players}' assert state.round == 1, f'Round {state.round}'
def test_terminal_after_raise(): state = State([1, 2, 3], 2, None) state = state.take('C', deep=True) state = state.take('1R', deep=True) assert state.terminal is False, state state = state.take('C', deep=True) assert state.terminal is True, state
def test_turn(): state = State([1, 2, 3], 2, None) state.take('C') assert state.turn == 1 state = State([1, 2, 3], 2, None) state.take('F') with pytest.raises(ValueError): state.take('C')
def test_turn(): state = State([1, 2, 3], 2, 1, None) state.take('C') assert state.turn == 1 state = State([1, 2, 3], 2, 1, None) state.take('F') state.take('C') state.take('C') assert state.turn == 1
def learn(iterations, cards, num_cards, node_map, action_map): if len(cards) > 4: from leduc.state import Leduc as State else: from leduc.state import State all_combos = [list(t) for t in set(permutations(cards, num_cards))] num_players = len(node_map) for i in tqdm(range(1, iterations + 1), desc="learning"): card = np.random.choice(len(all_combos)) for player in range(num_players): state = State(all_combos[card], num_players, kuhn_eval) if i % STRAT_INTERVAL == 0: update_strategy(player, state, node_map, action_map) if i > PRUNE_THRESH: chance = np.random.rand() if chance < .05: accumulate_regrets(player, state, node_map, action_map) else: accumulate_regrets(player, state, node_map, action_map, prune=True) else: accumulate_regrets(player, state, node_map, action_map) if i < LCFR_INTERVAL and i % DISCOUNT == 0: discounted = (i/DISCOUNT)/(i/(DISCOUNT + 1)) for player in node_map: player_nodes = node_map[player] for node in player_nodes.values(): node.regret_sum = {key: value * discounted for key, value in node.regret_sum.items()} node.strategy_sum = {key: value * discounted for key, value in node.strategy_sum.items()}
def test_terminal(): state = State([i for i in range(6)], 2, None) state = state.take('F', deep=True) assert state.terminal == True, f'State {state}' state = State([i for i in range(6)], 2, None) state = state.take('C', deep=True) state = state.take('C', deep=True) state = state.take('C', deep=True) state = state.take('C', deep=True) assert state.terminal == True, f'State {state}' with pytest.raises(ValueError): state.take('1000R', deep=True)
def __init__(self, node_map, action_map, cards, num_cards): self.blueprint = node_map self.action_map = action_map self.all_combos = [ list(t) for t in set(permutations(cards, num_cards)) ] card = np.random.choice(len(self.all_combos)) self.root = State(self.all_combos[card], len(node_map), eval)
def learn(iterations, node_map, action_map): num_players = len(node_map) cards = [Card(14 - i, 1) for i in range(num_players + 1)] for i in tqdm(range(iterations), desc="learning"): np.random.shuffle(cards) for player in range(num_players): state = State(cards, num_players, 1, kuhn_eval) probs = np.ones(num_players) accumulate_regrets(state, node_map, action_map, probs)
def expected_utility(cards, num_cards, num_players, eval, node_map, action_map): cards = sorted(cards) all_combos = [list(t) for t in set(permutations(cards, num_cards))] expected_utility = np.zeros(num_players) for card in tqdm(all_combos, desc='calculating expected utility'): hand = State(card, num_players, 1, eval) expected_utility += traverse_tree(hand, node_map, action_map) return expected_utility/len(all_combos)
def test_terminal(): state = State([1, 2, 3], 2, None) assert state.terminal is False, state state.take('F') assert state.terminal is True, state state = State([1, 2, 3], 3, None) assert state.terminal is False, state state.take('F') assert state.terminal is False, state state.take('F') assert state.terminal is True, state
def learn(iterations, cards, num_cards, node_map, action_map): if len(cards) > 4: from leduc.state import Leduc as State else: from leduc.state import State all_combos = [list(t) for t in set(permutations(cards, num_cards))] num_players = len(node_map) for i in tqdm(range(iterations), desc="learning"): card = np.random.choice(len(all_combos)) for player in range(num_players): state = State(all_combos[card], num_players, kuhn_eval) probs = np.ones(num_players) accumulate_regrets(state, node_map, action_map, probs)
def build_tree(cards, num_players): if len(cards) > 4: from leduc.state import Leduc as State from leduc.hand_eval import leduc_eval as eval else: from leduc.state import State from leduc.hand_eval import kuhn_eval as eval state = State(cards, num_players, eval) public_states = {} traverse_public(state, public_states) return public_states, state
def test_pot(): state = State([i for i in range(6)], 2, None) assert sum(state.players) == state.num_players, sum(state.players) state = state.take('2R', deep=True) assert sum(state.players) == 4, sum(state.players) assert state.players[0].bets == 3, state.players[0].bets state = state.take('C', deep=True) assert sum(state.players) == 6, sum(state.players) assert state.players[1].bets == 3, state.players[1].bets
def expected_utility(cards, num_cards, num_players, node_map, action_map): if len(cards) > 4: from leduc.state import Leduc as State from leduc.hand_eval import leduc_eval as eval else: from leduc.state import State from leduc.hand_eval import kuhn_eval as eval cards = sorted(cards) all_combos = [list(t) for t in set(permutations(cards, num_cards))] expected_utility = np.zeros(num_players) for card in tqdm(all_combos, desc='calculating expected utility'): hand = State(card, num_players, eval) expected_utility += traverse_tree(hand, node_map, action_map) return expected_utility / len(all_combos)
def test_update_strategy(): num_players = 2 node_map = {i: {} for i in range(num_players)} action_map = {i: {} for i in range(num_players)} n1 = Node(['F', 'C', '1R']) n1.regret_sum = {'F': 0, 'C': 1, '1R': 0} n2 = Node(['F', 'C', '1R']) n2.regret_sum = {'F': 1, 'C': 0, '1R': 1} node_map[0]['As || [[]]'] = n1 node_map[0]["As || [['C', '1R']]"] = n2 cards = [Card(14, 1), Card(13, 1), Card(12, 1)] state = State(cards, num_players, 1, kuhn_eval) update_strategy(0, state, node_map, action_map) assert sum(n1.strategy_sum.values()) > 0, f'Util\n{n1}, \nNodes\n{node_map}, Actions\n{json.dumps(action_map, indent=4)}' assert sum(n2.strategy_sum.values()) > 0, f'Util\n{n1}, \nNodes\n{node_map}, Actions\n{json.dumps(action_map, indent=4)}'
def test_terminal_multiround(): state = State([1, 2, 3], 2, 1, None) assert state.terminal is False, state state.take('F') assert state.terminal is True, state state = State([1, 2, 3], 2, 2, None) state.take('C') assert state.terminal is False, state state.take('C') assert state.terminal is False, state state.take('C') assert state.terminal is False, state state.take('C') assert state.terminal is True, state state = State([1, 2, 3], 2, 2, None) state.take('C') assert state.terminal is False, state state.take('C') assert state.terminal is False, state state.take('C') assert state.terminal is False, state state.take('1R') assert state.terminal is False, state state = State([1, 2, 3], 2, 2, None) state.take('C') assert state.terminal is False, state state.take('C') assert state.terminal is False, state state.take('C') assert state.terminal is False, state state.take('1R') assert state.terminal is False, state state.take('1R') assert state.terminal is True, state
def test_valid_actions(): state = State([1, 2, 3], 2, None) actions = state.valid_actions() assert actions == ['F', 'C', '1R'], actions state.take('C') actions = state.valid_actions() assert actions == ['F', 'C', '1R'], actions state = State([1, 2, 3], 2, None) state.take('1R') actions = state.valid_actions() assert actions == ['F', 'C'], actions
def test_kuhn_utility(): cards = [Card(14, 1), Card(13, 1), Card(12, 1)] state = State(cards, 2, kuhn_eval) state.take('C') state.take('C') utility = state.utility() assert np.array_equal(utility, np.array([1, -1])), utility cards = [Card(14, 1), Card(13, 1), Card(12, 1)] state = State(cards, 2, kuhn_eval) state.take('1R') state.take('C') utility = state.utility() assert np.array_equal(utility, np.array([2, -2])), utility