Ejemplo n.º 1
0
def test_turn():
    state = State([i for i in range(6)], 2, None)

    assert state.turn == 0, f'State {state}'

    state = state.take('C', deep=True)
    assert state.turn == 1, f'State {state}'

    state = state.take('C', deep=True)
    assert state.turn == 0, f'State {state}'

    state = State([i for i in range(6)], 2, None)

    state = state.take('2R', deep=True)
    assert state.turn == 1, f'State {state}'

    state = state.take('2R', deep=True)
    assert state.turn == 0, f'State {state}'

    state = State([i for i in range(6)], 2, None)

    state = state.take('C', deep=True)
    assert state.turn == 1, f'State {state}'

    state = state.take('2R', deep=True)
    assert state.turn == 0, f'State {state}'

    state = state.take('2R', deep=True)
    assert state.turn == 1, f'State {state}'
Ejemplo n.º 2
0
def test_valid_actions():
    state = State([i for i in range(6)], 2, None)

    state.take('C')
    new_state = state.take('C', deep=True)

    assert (state.valid_actions() == ['F', 'C', '2R']
            and new_state.valid_actions()
            == ['F', 'C', '4R']), f'Actions {state.valid_actions()}'

    state = State([i for i in range(6)], 2, None)

    state = state.take('2R', deep=True)

    assert state.players[0].bets == 3, f'Bet {state.players[0].bets}'
    assert state.valid_actions() == ['F', 'C',
                                     '2R'], f'Actions {state.valid_actions()}'

    state = state.take('2R', deep=True)

    assert state.players[1].bets == 5, f'Bet {state.players[0].bets}'
    assert state.round == 0 and state.valid_actions() == [
        'F', 'C'
    ], f'Actions {state.valid_actions()}'

    state = state.take('C', deep=True)

    assert all([p.bets == 5 for p in state.players]), f'Bet {state.players}'
    assert state.round == 1, f'Round {state.round}'
Ejemplo n.º 3
0
def test_terminal_after_raise():
    state = State([1, 2, 3], 2, None)

    state = state.take('C', deep=True)
    state = state.take('1R', deep=True)

    assert state.terminal is False, state

    state = state.take('C', deep=True)

    assert state.terminal is True, state
Ejemplo n.º 4
0
def test_turn():
    state = State([1, 2, 3], 2, None)

    state.take('C')
    assert state.turn == 1

    state = State([1, 2, 3], 2, None)

    state.take('F')
    with pytest.raises(ValueError):
        state.take('C')
Ejemplo n.º 5
0
def test_turn():
    state = State([1, 2, 3], 2, 1, None)

    state.take('C')
    assert state.turn == 1

    state = State([1, 2, 3], 2, 1, None)

    state.take('F')
    state.take('C')
    state.take('C')

    assert state.turn == 1
Ejemplo n.º 6
0
def learn(iterations, cards, num_cards, node_map, action_map):
    if len(cards) > 4:
        from leduc.state import Leduc as State
    else:
        from leduc.state import State
    all_combos = [list(t) for t in set(permutations(cards, num_cards))]
    num_players = len(node_map)
    for i in tqdm(range(1, iterations + 1), desc="learning"):
        card = np.random.choice(len(all_combos))
        for player in range(num_players):
            state = State(all_combos[card], num_players, kuhn_eval)
            if i % STRAT_INTERVAL == 0:
                update_strategy(player, state, node_map, action_map)

            if i > PRUNE_THRESH:
                chance = np.random.rand()
                if chance < .05:
                    accumulate_regrets(player, state, node_map, action_map)
                else:
                    accumulate_regrets(player, state, node_map, action_map,
                                       prune=True)
            else:
                accumulate_regrets(player, state, node_map, action_map)

        if i < LCFR_INTERVAL and i % DISCOUNT == 0:
            discounted = (i/DISCOUNT)/(i/(DISCOUNT + 1))
            for player in node_map:
                player_nodes = node_map[player]
                for node in player_nodes.values():
                    node.regret_sum = {key: value * discounted for
                                       key, value in node.regret_sum.items()}
                    node.strategy_sum = {key: value * discounted for
                                         key, value in node.strategy_sum.items()}
Ejemplo n.º 7
0
def test_terminal():
    state = State([i for i in range(6)], 2, None)

    state = state.take('F', deep=True)

    assert state.terminal == True, f'State {state}'

    state = State([i for i in range(6)], 2, None)
    state = state.take('C', deep=True)
    state = state.take('C', deep=True)
    state = state.take('C', deep=True)
    state = state.take('C', deep=True)

    assert state.terminal == True, f'State {state}'

    with pytest.raises(ValueError):
        state.take('1000R', deep=True)
Ejemplo n.º 8
0
    def __init__(self, node_map, action_map, cards, num_cards):
        self.blueprint = node_map
        self.action_map = action_map

        self.all_combos = [
            list(t) for t in set(permutations(cards, num_cards))
        ]
        card = np.random.choice(len(self.all_combos))
        self.root = State(self.all_combos[card], len(node_map), eval)
Ejemplo n.º 9
0
def learn(iterations, node_map, action_map):
    num_players = len(node_map)
    cards = [Card(14 - i, 1) for i in range(num_players + 1)]
    for i in tqdm(range(iterations), desc="learning"):
        np.random.shuffle(cards)
        for player in range(num_players):
            state = State(cards, num_players, 1, kuhn_eval)
            probs = np.ones(num_players)
            accumulate_regrets(state, node_map, action_map, probs)
Ejemplo n.º 10
0
def expected_utility(cards, num_cards, num_players, eval, node_map, action_map):
    cards = sorted(cards)
    all_combos = [list(t) for t in set(permutations(cards, num_cards))]

    expected_utility = np.zeros(num_players)
    for card in tqdm(all_combos, desc='calculating expected utility'):
        hand = State(card, num_players, 1, eval)
        expected_utility += traverse_tree(hand, node_map, action_map)

    return expected_utility/len(all_combos)
Ejemplo n.º 11
0
def test_terminal():
    state = State([1, 2, 3], 2, None)

    assert state.terminal is False, state

    state.take('F')

    assert state.terminal is True, state

    state = State([1, 2, 3], 3, None)

    assert state.terminal is False, state

    state.take('F')

    assert state.terminal is False, state

    state.take('F')

    assert state.terminal is True, state
Ejemplo n.º 12
0
def learn(iterations, cards, num_cards, node_map, action_map):
    if len(cards) > 4:
        from leduc.state import Leduc as State
    else:
        from leduc.state import State
    all_combos = [list(t) for t in set(permutations(cards, num_cards))]
    num_players = len(node_map)
    for i in tqdm(range(iterations), desc="learning"):
        card = np.random.choice(len(all_combos))
        for player in range(num_players):
            state = State(all_combos[card], num_players, kuhn_eval)
            probs = np.ones(num_players)
            accumulate_regrets(state, node_map, action_map, probs)
Ejemplo n.º 13
0
def build_tree(cards, num_players):
    if len(cards) > 4:
        from leduc.state import Leduc as State
        from leduc.hand_eval import leduc_eval as eval
    else:
        from leduc.state import State
        from leduc.hand_eval import kuhn_eval as eval
    
    state = State(cards, num_players, eval)
    public_states = {} 

    traverse_public(state, public_states)

    return public_states, state
Ejemplo n.º 14
0
def test_pot():
    state = State([i for i in range(6)], 2, None)

    assert sum(state.players) == state.num_players, sum(state.players)

    state = state.take('2R', deep=True)

    assert sum(state.players) == 4, sum(state.players)
    assert state.players[0].bets == 3, state.players[0].bets

    state = state.take('C', deep=True)

    assert sum(state.players) == 6, sum(state.players)
    assert state.players[1].bets == 3, state.players[1].bets
Ejemplo n.º 15
0
def expected_utility(cards, num_cards, num_players, node_map, action_map):
    if len(cards) > 4:
        from leduc.state import Leduc as State
        from leduc.hand_eval import leduc_eval as eval
    else:
        from leduc.state import State
        from leduc.hand_eval import kuhn_eval as eval
    cards = sorted(cards)
    all_combos = [list(t) for t in set(permutations(cards, num_cards))]

    expected_utility = np.zeros(num_players)
    for card in tqdm(all_combos, desc='calculating expected utility'):
        hand = State(card, num_players, eval)
        expected_utility += traverse_tree(hand, node_map, action_map)

    return expected_utility / len(all_combos)
Ejemplo n.º 16
0
def test_update_strategy():
    num_players = 2
    node_map = {i: {} for i in range(num_players)}
    action_map = {i: {} for i in range(num_players)}
    n1 = Node(['F', 'C', '1R'])
    n1.regret_sum = {'F': 0, 'C': 1, '1R': 0}

    n2 = Node(['F', 'C', '1R'])
    n2.regret_sum = {'F': 1, 'C': 0, '1R': 1}

    node_map[0]['As || [[]]'] = n1
    node_map[0]["As || [['C', '1R']]"] = n2
    cards = [Card(14, 1), Card(13, 1), Card(12, 1)]
    state = State(cards, num_players, 1, kuhn_eval)

    update_strategy(0, state, node_map, action_map)

    assert sum(n1.strategy_sum.values()) > 0, f'Util\n{n1}, \nNodes\n{node_map}, Actions\n{json.dumps(action_map, indent=4)}'
    assert sum(n2.strategy_sum.values()) > 0, f'Util\n{n1}, \nNodes\n{node_map}, Actions\n{json.dumps(action_map, indent=4)}'
Ejemplo n.º 17
0
def test_terminal_multiround():
    state = State([1, 2, 3], 2, 1, None)

    assert state.terminal is False, state

    state.take('F')

    assert state.terminal is True, state

    state = State([1, 2, 3], 2, 2, None)

    state.take('C')
    assert state.terminal is False, state

    state.take('C')
    assert state.terminal is False, state

    state.take('C')
    assert state.terminal is False, state

    state.take('C')
    assert state.terminal is True, state

    state = State([1, 2, 3], 2, 2, None)

    state.take('C')
    assert state.terminal is False, state

    state.take('C')
    assert state.terminal is False, state

    state.take('C')
    assert state.terminal is False, state

    state.take('1R')
    assert state.terminal is False, state

    state = State([1, 2, 3], 2, 2, None)

    state.take('C')
    assert state.terminal is False, state

    state.take('C')
    assert state.terminal is False, state

    state.take('C')
    assert state.terminal is False, state

    state.take('1R')
    assert state.terminal is False, state

    state.take('1R')
    assert state.terminal is True, state
Ejemplo n.º 18
0
def test_valid_actions():
    state = State([1, 2, 3], 2, None)

    actions = state.valid_actions()
    assert actions == ['F', 'C', '1R'], actions

    state.take('C')
    actions = state.valid_actions()
    assert actions == ['F', 'C', '1R'], actions

    state = State([1, 2, 3], 2, None)

    state.take('1R')
    actions = state.valid_actions()
    assert actions == ['F', 'C'], actions
Ejemplo n.º 19
0
def test_kuhn_utility():
    cards = [Card(14, 1), Card(13, 1), Card(12, 1)]
    state = State(cards, 2, kuhn_eval)

    state.take('C')
    state.take('C')

    utility = state.utility()

    assert np.array_equal(utility, np.array([1, -1])), utility

    cards = [Card(14, 1), Card(13, 1), Card(12, 1)]
    state = State(cards, 2, kuhn_eval)

    state.take('1R')
    state.take('C')

    utility = state.utility()

    assert np.array_equal(utility, np.array([2, -2])), utility