Beispiel #1
0
def test_turn():
    state = State([i for i in range(6)], 2, None)

    assert state.turn == 0, f'State {state}'

    state = state.take('C', deep=True)
    assert state.turn == 1, f'State {state}'

    state = state.take('C', deep=True)
    assert state.turn == 0, f'State {state}'

    state = State([i for i in range(6)], 2, None)

    state = state.take('2R', deep=True)
    assert state.turn == 1, f'State {state}'

    state = state.take('2R', deep=True)
    assert state.turn == 0, f'State {state}'

    state = State([i for i in range(6)], 2, None)

    state = state.take('C', deep=True)
    assert state.turn == 1, f'State {state}'

    state = state.take('2R', deep=True)
    assert state.turn == 0, f'State {state}'

    state = state.take('2R', deep=True)
    assert state.turn == 1, f'State {state}'
Beispiel #2
0
def test_valid_actions():
    state = State([i for i in range(6)], 2, None)

    state.take('C')
    new_state = state.take('C', deep=True)

    assert (state.valid_actions() == ['F', 'C', '2R']
            and new_state.valid_actions()
            == ['F', 'C', '4R']), f'Actions {state.valid_actions()}'

    state = State([i for i in range(6)], 2, None)

    state = state.take('2R', deep=True)

    assert state.players[0].bets == 3, f'Bet {state.players[0].bets}'
    assert state.valid_actions() == ['F', 'C',
                                     '2R'], f'Actions {state.valid_actions()}'

    state = state.take('2R', deep=True)

    assert state.players[1].bets == 5, f'Bet {state.players[0].bets}'
    assert state.round == 0 and state.valid_actions() == [
        'F', 'C'
    ], f'Actions {state.valid_actions()}'

    state = state.take('C', deep=True)

    assert all([p.bets == 5 for p in state.players]), f'Bet {state.players}'
    assert state.round == 1, f'Round {state.round}'
Beispiel #3
0
def test_terminal_after_raise():
    state = State([1, 2, 3], 2, None)

    state = state.take('C', deep=True)
    state = state.take('1R', deep=True)

    assert state.terminal is False, state

    state = state.take('C', deep=True)

    assert state.terminal is True, state
Beispiel #4
0
def test_turn():
    state = State([1, 2, 3], 2, None)

    state.take('C')
    assert state.turn == 1

    state = State([1, 2, 3], 2, None)

    state.take('F')
    with pytest.raises(ValueError):
        state.take('C')
Beispiel #5
0
def test_turn():
    state = State([1, 2, 3], 2, 1, None)

    state.take('C')
    assert state.turn == 1

    state = State([1, 2, 3], 2, 1, None)

    state.take('F')
    state.take('C')
    state.take('C')

    assert state.turn == 1
Beispiel #6
0
def learn(iterations, cards, num_cards, node_map, action_map):
    if len(cards) > 4:
        from leduc.state import Leduc as State
    else:
        from leduc.state import State
    all_combos = [list(t) for t in set(permutations(cards, num_cards))]
    num_players = len(node_map)
    for i in tqdm(range(1, iterations + 1), desc="learning"):
        card = np.random.choice(len(all_combos))
        for player in range(num_players):
            state = State(all_combos[card], num_players, kuhn_eval)
            if i % STRAT_INTERVAL == 0:
                update_strategy(player, state, node_map, action_map)

            if i > PRUNE_THRESH:
                chance = np.random.rand()
                if chance < .05:
                    accumulate_regrets(player, state, node_map, action_map)
                else:
                    accumulate_regrets(player, state, node_map, action_map,
                                       prune=True)
            else:
                accumulate_regrets(player, state, node_map, action_map)

        if i < LCFR_INTERVAL and i % DISCOUNT == 0:
            discounted = (i/DISCOUNT)/(i/(DISCOUNT + 1))
            for player in node_map:
                player_nodes = node_map[player]
                for node in player_nodes.values():
                    node.regret_sum = {key: value * discounted for
                                       key, value in node.regret_sum.items()}
                    node.strategy_sum = {key: value * discounted for
                                         key, value in node.strategy_sum.items()}
Beispiel #7
0
def test_terminal():
    state = State([i for i in range(6)], 2, None)

    state = state.take('F', deep=True)

    assert state.terminal == True, f'State {state}'

    state = State([i for i in range(6)], 2, None)
    state = state.take('C', deep=True)
    state = state.take('C', deep=True)
    state = state.take('C', deep=True)
    state = state.take('C', deep=True)

    assert state.terminal == True, f'State {state}'

    with pytest.raises(ValueError):
        state.take('1000R', deep=True)
Beispiel #8
0
    def __init__(self, node_map, action_map, cards, num_cards):
        self.blueprint = node_map
        self.action_map = action_map

        self.all_combos = [
            list(t) for t in set(permutations(cards, num_cards))
        ]
        card = np.random.choice(len(self.all_combos))
        self.root = State(self.all_combos[card], len(node_map), eval)
Beispiel #9
0
def learn(iterations, node_map, action_map):
    num_players = len(node_map)
    cards = [Card(14 - i, 1) for i in range(num_players + 1)]
    for i in tqdm(range(iterations), desc="learning"):
        np.random.shuffle(cards)
        for player in range(num_players):
            state = State(cards, num_players, 1, kuhn_eval)
            probs = np.ones(num_players)
            accumulate_regrets(state, node_map, action_map, probs)
Beispiel #10
0
def expected_utility(cards, num_cards, num_players, eval, node_map, action_map):
    cards = sorted(cards)
    all_combos = [list(t) for t in set(permutations(cards, num_cards))]

    expected_utility = np.zeros(num_players)
    for card in tqdm(all_combos, desc='calculating expected utility'):
        hand = State(card, num_players, 1, eval)
        expected_utility += traverse_tree(hand, node_map, action_map)

    return expected_utility/len(all_combos)
Beispiel #11
0
def test_terminal():
    state = State([1, 2, 3], 2, None)

    assert state.terminal is False, state

    state.take('F')

    assert state.terminal is True, state

    state = State([1, 2, 3], 3, None)

    assert state.terminal is False, state

    state.take('F')

    assert state.terminal is False, state

    state.take('F')

    assert state.terminal is True, state
Beispiel #12
0
def learn(iterations, cards, num_cards, node_map, action_map):
    if len(cards) > 4:
        from leduc.state import Leduc as State
    else:
        from leduc.state import State
    all_combos = [list(t) for t in set(permutations(cards, num_cards))]
    num_players = len(node_map)
    for i in tqdm(range(iterations), desc="learning"):
        card = np.random.choice(len(all_combos))
        for player in range(num_players):
            state = State(all_combos[card], num_players, kuhn_eval)
            probs = np.ones(num_players)
            accumulate_regrets(state, node_map, action_map, probs)
Beispiel #13
0
def build_tree(cards, num_players):
    if len(cards) > 4:
        from leduc.state import Leduc as State
        from leduc.hand_eval import leduc_eval as eval
    else:
        from leduc.state import State
        from leduc.hand_eval import kuhn_eval as eval
    
    state = State(cards, num_players, eval)
    public_states = {} 

    traverse_public(state, public_states)

    return public_states, state
Beispiel #14
0
def test_pot():
    state = State([i for i in range(6)], 2, None)

    assert sum(state.players) == state.num_players, sum(state.players)

    state = state.take('2R', deep=True)

    assert sum(state.players) == 4, sum(state.players)
    assert state.players[0].bets == 3, state.players[0].bets

    state = state.take('C', deep=True)

    assert sum(state.players) == 6, sum(state.players)
    assert state.players[1].bets == 3, state.players[1].bets
Beispiel #15
0
def expected_utility(cards, num_cards, num_players, node_map, action_map):
    if len(cards) > 4:
        from leduc.state import Leduc as State
        from leduc.hand_eval import leduc_eval as eval
    else:
        from leduc.state import State
        from leduc.hand_eval import kuhn_eval as eval
    cards = sorted(cards)
    all_combos = [list(t) for t in set(permutations(cards, num_cards))]

    expected_utility = np.zeros(num_players)
    for card in tqdm(all_combos, desc='calculating expected utility'):
        hand = State(card, num_players, eval)
        expected_utility += traverse_tree(hand, node_map, action_map)

    return expected_utility / len(all_combos)
Beispiel #16
0
def test_update_strategy():
    num_players = 2
    node_map = {i: {} for i in range(num_players)}
    action_map = {i: {} for i in range(num_players)}
    n1 = Node(['F', 'C', '1R'])
    n1.regret_sum = {'F': 0, 'C': 1, '1R': 0}

    n2 = Node(['F', 'C', '1R'])
    n2.regret_sum = {'F': 1, 'C': 0, '1R': 1}

    node_map[0]['As || [[]]'] = n1
    node_map[0]["As || [['C', '1R']]"] = n2
    cards = [Card(14, 1), Card(13, 1), Card(12, 1)]
    state = State(cards, num_players, 1, kuhn_eval)

    update_strategy(0, state, node_map, action_map)

    assert sum(n1.strategy_sum.values()) > 0, f'Util\n{n1}, \nNodes\n{node_map}, Actions\n{json.dumps(action_map, indent=4)}'
    assert sum(n2.strategy_sum.values()) > 0, f'Util\n{n1}, \nNodes\n{node_map}, Actions\n{json.dumps(action_map, indent=4)}'
Beispiel #17
0
def test_terminal_multiround():
    state = State([1, 2, 3], 2, 1, None)

    assert state.terminal is False, state

    state.take('F')

    assert state.terminal is True, state

    state = State([1, 2, 3], 2, 2, None)

    state.take('C')
    assert state.terminal is False, state

    state.take('C')
    assert state.terminal is False, state

    state.take('C')
    assert state.terminal is False, state

    state.take('C')
    assert state.terminal is True, state

    state = State([1, 2, 3], 2, 2, None)

    state.take('C')
    assert state.terminal is False, state

    state.take('C')
    assert state.terminal is False, state

    state.take('C')
    assert state.terminal is False, state

    state.take('1R')
    assert state.terminal is False, state

    state = State([1, 2, 3], 2, 2, None)

    state.take('C')
    assert state.terminal is False, state

    state.take('C')
    assert state.terminal is False, state

    state.take('C')
    assert state.terminal is False, state

    state.take('1R')
    assert state.terminal is False, state

    state.take('1R')
    assert state.terminal is True, state
Beispiel #18
0
def test_valid_actions():
    state = State([1, 2, 3], 2, None)

    actions = state.valid_actions()
    assert actions == ['F', 'C', '1R'], actions

    state.take('C')
    actions = state.valid_actions()
    assert actions == ['F', 'C', '1R'], actions

    state = State([1, 2, 3], 2, None)

    state.take('1R')
    actions = state.valid_actions()
    assert actions == ['F', 'C'], actions
Beispiel #19
0
def test_kuhn_utility():
    cards = [Card(14, 1), Card(13, 1), Card(12, 1)]
    state = State(cards, 2, kuhn_eval)

    state.take('C')
    state.take('C')

    utility = state.utility()

    assert np.array_equal(utility, np.array([1, -1])), utility

    cards = [Card(14, 1), Card(13, 1), Card(12, 1)]
    state = State(cards, 2, kuhn_eval)

    state.take('1R')
    state.take('C')

    utility = state.utility()

    assert np.array_equal(utility, np.array([2, -2])), utility