def test_best_response_cfr_one_card_poker(): game = OneCardPoker.create_game(n_cards=4) strategy, exploitabilities, strategies = cfr(game, num_iters=10, use_chance_sampling=False) exploitability = compute_exploitability(game, strategy) print("Exploitability: {}".format(exploitability)) assert exploitability > 0.0
def test_best_response_cfr(): """Test we can run 10 iterations of CFR on Leduc and then compute a best response. """ cards = [Card(value, suit) for value in range(3) for suit in range(2)] game = Leduc(cards) strategy, exploitabilities, strategies = cfr(game, num_iters=10, use_chance_sampling=False) exploitability = compute_exploitability(game, strategy) print("Exploitability: {}".format(exploitability)) assert exploitability > 0.0
default=1e-4, help='The number of epochs to train the neural network for.') parser.add_argument('--dropout_rate', default=None, help='The dropout rate to use.') args = parser.parse_args() dropout_rate = None if args.dropout_rate: dropout_rate = float(args.dropout_rate) cards = get_deck(num_values=args.num_values, num_suits=args.num_suits) game = Leduc(cards) strategy, exploitabilities, strategies = cfr( game, num_iters=args.cfr_iters, use_chance_sampling=args.use_chance_sampling) exploitability = compute_exploitability(game, strategy) print("Exploitability of final strategy: {}".format(exploitability)) leduc_nfsp = LeducNFSP(cards) state_vectors = leduc_nfsp._state_vectors state_dim = leduc_nfsp.state_dim action_dim = leduc_nfsp.action_dim # Now build a network. layer_dims = [64, 64, 64] network = build_network(state_dim, action_dim, layer_dims,