Esempio n. 1
0
def main():
    N = 4
    g = Goofspiel(N, scoring=Goofspiel.Scoring.ZEROSUM)
    mc = OutcomeMCCFR(g, seed=56)

    its = 100.0
    while its < 1000000:
        fname = "goof-{}-{}.strat".format(N, its)
        mc.persist(fname, iterations=iterations=int(its) - mc.iterations)
        its *= 2 ** 0.5
        print("Exploitability after {:7d} turns (mc, g): {}, {}".format(
            int(its), exploitability(g, 0, mc), exploitability(g, 1, mc)))
    assert 0
    vs = GoofSpielCardsValueStore(g)
    vl = SparseStochasticValueLearning(g, vs, seed=41)
    vals = np.concatenate([
        vl.compute([mc, mc], 1000, alpha=0.01, store_step=1),
        vl.compute([mc, mc], 1000, alpha=0.001, store_step=1),
        vl.compute([mc, mc], 1000, alpha=0.0001, store_step=1),
    ], axis=0)
    plt.plot(vals)
    plt.show()

    print("Values:", vs.values)

    g2 = Goofspiel(N, scoring=Goofspiel.Scoring.ZEROSUM, rewards=vs.values)
    mc2 = OutcomeMCCFR(g2, seed=57)
    mc2.compute(iterations=ITERS)
    print("Exp(mc2, g2)", exploitability(g2, 0, mc2), exploitability(g2, 1, mc2))
    print("Exp(mc2, g)", exploitability(g, 0, mc2), exploitability(g, 1, mc2))
Esempio n. 2
0
def test_goofspiel():
    g = Goofspiel(7, Goofspiel.Scoring.ZEROSUM)
    s = g.start()

    assert s.is_chance()
    assert s.actions == tuple(range(1, 8))
    assert (s.chance == (pytest.approx(1 / 7), ) * 7)

    for i, a in enumerate([4, 2, 1, 5, 4, 6, 6, 3, 3, 2, 5, 4, 3, 7]):
        s = s.play(a)

    assert s.player == 1
    assert s.actions == (2, 5, 7)
    assert s.observations[2] == (4, 1, 5, -1, 6, 0, 2, 1, 3)
    assert s.state[1] == pytest.approx([6, 5])
    assert s.state[0][0] == (1, 6)
    assert s.state[0][1] == (2, 5, 7)
    assert s.state[0][2] == (1, 7)

    for a in [2, 7, 6, 7, 1, 1, 5]:
        s = s.play(a)

    assert s.is_terminal()
    assert s.state[1] == pytest.approx([9, 13])
    assert s.payoff == pytest.approx([-4.0, 4.0])
Esempio n. 3
0
def test_goofspiel():
    g = Goofspiel(4, scoring=Goofspiel.Scoring.ZEROSUM)
    mc = OutcomeMCCFR(g, seed=42)
    mc.compute(100)
    vs = LinearValueStore(goofspiel_feaures_cards(g.initial_state()), fix_mean=2.5)
    infosampler = InformationSetSampler(g, mc)
    val = SparseSGDLinearValueLearning(g, goofspiel_feaures_cards, vs, infosampler, seed=43)
    val.compute([mc, mc], 100, 0.1, 0.01)
    print(vs.values)
Esempio n. 4
0
def test_goofspiel_rewards():
    us = [UniformStrategy(), UniformStrategy()]
    g = Goofspiel(2, Goofspiel.Scoring.ZEROSUM, rewards=[100, 11])
    for i in range(50):
        s = play_strategies(g, us, seed=i)
        assert tuple(s.payoff) in ((0.0, 0.0), (-89.0, 89.0), (89.0, -89.0))

    g = Goofspiel(2, Goofspiel.Scoring.ABSOLUTE, rewards=[100, 11])
    for i in range(50):
        s = play_strategies(g, us, seed=i)
        assert tuple(s.payoff) in ((0.0, 0.0), (100.0, 11.0), (11.0, 100.0))
Esempio n. 5
0
def main():
    N = 4
    ITERS = 2000000
    g = Goofspiel(N, scoring=Goofspiel.Scoring.ZEROSUM)
    mc = OutcomeMCCFR(g, seed=56)
    fname = "goof-{}".format(N)

    its = 1024
    while its < ITERS:
        cached = mc.persist(fname, iterations=its)
        if not cached:
            print("Exploitability after {:7d} turns (mc, g): {}, {}".format(
                its, exploitability(g, 0, mc), exploitability(g, 1, mc)))
        its *= 2
    infosampler = InformationSetSampler(g, mc)

    vsts = (1, 3)
    gsts = (1, 3)
    ax0 = plt.subplot(len(vsts), len(gsts), 1)
    for i, (vst, gst) in enumerate(itertools.product(vsts, gsts)):
        vs = LinearValueStore(goofspiel_feaures_cards(g.initial_state()),
                              fix_mean=(N + 1) / 2.0)
        vl = SparseSGDLinearValueLearning(g,
                                          goofspiel_feaures_cards,
                                          vs,
                                          infosampler,
                                          seed=44)
        vals = np.concatenate([
            vl.compute([mc, mc],
                       1000,
                       step=s,
                       record_every=1,
                       val_samples=vst,
                       grad_samples=gst)
            for s in [2**-8, 2**-9, 2**-10, 2**-11]
        ],
                              axis=0)
        #c = ['red', 'green', 'blue', 'black'][i]
        ax = plt.subplot(len(vsts), len(gsts), i + 1, sharex=ax0, sharey=ax0)
        ax.plot(vals)
        ax.legend(list(range(1, N + 1)))
        ax.set_title("valseps={} gradsteps={}".format(vst, gst))
        print("Done sampling valseps={} gradsteps={}".format(vst, gst))
        print("Values:", vs.values)
    plt.show()

    return

    g2 = Goofspiel(N, scoring=Goofspiel.Scoring.ZEROSUM, rewards=vs.values)
    mc2 = OutcomeMCCFR(g2, seed=57)
    mc2.compute(iterations=ITERS)
    print("Exp(mc2, g2)", exploitability(g2, 0, mc2),
          exploitability(g2, 1, mc2))
    print("Exp(mc2, g)", exploitability(g, 0, mc2), exploitability(g, 1, mc2))
Esempio n. 6
0
def test_mccfr_goofspiel3():
    g = Goofspiel(3, scoring=Goofspiel.Scoring.ZEROSUM)
    mc = OutcomeMCCFR(g, seed=51)
    mc.compute(600, burn=0.5)
    mcs = mc.strategies
    us = UniformStrategy()
    s1 = g.play_sequence([2])
    assert mcs[0].strategy(s1) == pytest.approx([0., 0.9, 0.], abs=0.1)
    assert sample_payoff(g, mcs, 300, seed=12)[0] == pytest.approx([0.0, 0.0],
                                                                   abs=0.1)
    assert sample_payoff(g, (mcs[0], us), 300,
                         seed=13)[0] == pytest.approx([1.2, -1.2], abs=0.2)
    assert exploitability(g, 0, mcs[0]) < 0.1
    assert exploitability(g, 1, mcs[1]) < 0.1
Esempio n. 7
0
def test_dump_gambit_game():
    g = Goofspiel(3, scoring=Goofspiel.Scoring.ZEROSUM)

    s = io.StringIO()
    write_efg(g, s, names=False)
    assert (len(s.getvalue()) > 1024)

    s = io.StringIO()
    write_efg(g, s, names=True)
    assert (len(s.getvalue()) > 1024)

    g2 = Goofspiel(2, scoring=Goofspiel.Scoring.WINLOSS)
    s = io.StringIO()
    write_efg(g2, s, names=True)
    assert len(s.getvalue().splitlines()) == 40
Esempio n. 8
0
def test_unit():
    g = Goofspiel(4, scoring=Goofspiel.Scoring.ZEROSUM)
    mc = OutcomeMCCFR(g, seed=42)
    mc.compute(500)
    vs = GoofSpielCardsValueStore(g)
    val = SparseStochasticValueLearning(g, vs, seed=43)
    val.compute([mc, mc], 200, 0.01)
Esempio n. 9
0
def xtest_server():

    from gamegym.games import Goofspiel
    from gamegym.strategy import UniformStrategy

    g = Goofspiel(5)

    s = Server()
    s.play_game(g, [None, UniformStrategy()])
Esempio n. 10
0
def test_approx_best_response_goofspiel():
    for n_cards, its, br_value in [(3, 1000, 1.333), (4, 20000, 2.5)]:
        game = Goofspiel(n_cards, Goofspiel.Scoring.ZEROSUM)
        strategy = ApproxBestResponse(game,
                                      0, [UniformStrategy()] * 2,
                                      iterations=its,
                                      seed=35)
        assert strategy.sample_value(its // 2) == pytest.approx(br_value,
                                                                rel=0.2)
Esempio n. 11
0
def test_best_response_goofspiel():

    for n_cards, br_value in [(3, pytest.approx(4/3)), (4, pytest.approx(2.5))]:
        game = Goofspiel(n_cards, Goofspiel.Scoring.ZEROSUM)
        strategy = BestResponse(game, 0, {1: UniformStrategy()})
        for k, v in strategy.best_responses.items():
            reward = k[1][-1]
            assert reward not in v.values() or v.probability(reward) == 1.0
        assert strategy.value == br_value
Esempio n. 12
0
def non_test_goofspiel():
    g = Goofspiel(4, scoring=Goofspiel.Scoring.ZEROSUM)
    mc = OutcomeMCCFR(g, seed=42)
    for s in [10, 100, 1000]:
        mc.compute(s)
        br = BestResponse(g, 0, [None, mc])
        print(
            "Exploit after", s,
            np.mean([
                g.play_strategies([br, mc], seed=i)[-1].values()[0]
                for i in range(1000)
            ]))

    vs = GoofSpielCardsValueStore(g)
    val = SparseStochasticValueLearning(g, vs, seed=43)
    for alpha in [0.1, 0.01, 0.01, 0.001, 0.0001]:
        print(alpha)
        val.compute([mc, mc], 200, alpha)
Esempio n. 13
0
def main():

    g = MatchingPennies()
    base = np.array([[1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0]])
    plot_to_files(g, "plot_mccfr_trace_pennies", 3, 1500, 150, base=base, exploit_every=1)

    g = RockPaperScissors()
    plot_to_files(g, "plot_mccfr_trace_rps", 3, 1500, 150, burn=0.3, exploit_every=1)

    g = Goofspiel(4, scoring=Goofspiel.Scoring.ZEROSUM)
    plot_to_files(
        g,
        "plot_mccfr_trace_goof4",
        6,
        1000000,
        1000,
        depth=6,
        burn=0.3,
        burn_from=3,
        exploit_every=1)

    g = Goofspiel(5, scoring=Goofspiel.Scoring.ZEROSUM)
    plot_to_files(
        g,
        "plot_mccfr_trace_goof5",
        6,
        1000000,
        1000,
        depth=6,
        burn=0.3,
        burn_from=3,
        exploit_every=10)

    g = DicePoker(6)
    plot_to_files(
        g,
        "plot_mccfr_trace_dicepoker",
        6,
        500000,
        500,
        depth=6,
        burn=0.3,
        burn_from=3,
        exploit_every=1)
Esempio n. 14
0
def test_mccfr_goofspiel4():
    g = Goofspiel(4, scoring=Goofspiel.Scoring.ZEROSUM)
    mc = OutcomeMCCFR(g, seed=49)
    mc.compute(10000, burn=0.5)
    mcs = mc.strategies
    for p in [0, 1]:
        exp = exploitability(g, p, mcs[p])
        aexp = approx_exploitability(g, p, mcs[p], 10000, seed=31 + p)
        print(p, exp, aexp)
        assert exp == pytest.approx(0.7, abs=0.2)
        assert aexp == pytest.approx(0.7, abs=0.2)
Esempio n. 15
0
def test_goofspeil():
    g = Goofspiel(7)
    s = g.initial_state()

    assert s.player() == s.P_CHANCE
    assert s.score(0) == 0
    assert s.score(1) == 0
    assert s.actions() == list(range(7))
    assert (
        s.chance_distribution().probabilities() == (pytest.approx(1 / 7), ) *
        7).all()

    for i, a in enumerate([3, 1, 0, 4, 3, 5, 5, 2, 2, 1, 4, 3, 2, 6]):
        s = s.play(a)
        assert s.player() == (i + 1) % 3 - 1

    assert s.round() == 4
    assert s.player() == 1
    assert s.actions() == [1, 4, 6]
    assert s.winners() == [0, 1, -1, 0]
    assert (
        s.chance_distribution().probabilities() == (pytest.approx(1.0 / 3), ) *
        3).all()
    assert s.score(0) == 6
    assert s.score(1) == 5

    assert s.cards_in_hand(-1) == [0, 6]
    assert s.cards_in_hand(0) == [0, 5]
    assert s.cards_in_hand(1) == [1, 4, 6]

    for a in [1, 6, 5, 6, 0, 0, 0]:
        s = s.play(a)

    assert s.is_terminal()
    assert s.score(0) == 9
    assert s.score(1) == 12

    assert s.values() == (-1, 1)
Esempio n. 16
0
def test_best_response_goofspiel():
    for n_cards, br_value in [(3, pytest.approx(4 / 3)),
                              (4, pytest.approx(2.5))]:
        game = Goofspiel(n_cards, Goofspiel.Scoring.ZEROSUM)
        strategy = BestResponse(game, 0, [UniformStrategy()] * 2)
        for k, v in strategy.best_responses.items():
            reward = k[-1]
            played_cards = k[0::3]
            idx = len([
                i for i in range(n_cards)
                if i < reward and i not in played_cards
            ])
            assert reward in played_cards or v[idx] == 1.0
        assert strategy.value == br_value
Esempio n. 17
0
def main():
    print("#### Rock-paper-scissors value estimation")
    g = RockPaperScissors()
    us = UniformStrategy()
    infosampler = InformationSetSampler(g, us)
    val = LPZeroSumValueLearning(g, infosampler, matrix_zerosum_features, us)

    # Regularize: set one payoff to 1.0
    val.add_condition({(0, 1): 1.0}, 1.0)
    print("# With only non-triviality (one payoff set to 1.0)")
    print(val.compute())
    print("Flex value sum", val.flex_sum)
    # Zero diagonal
    for i in range(3):
        val.add_condition({(i, i): 1.0}, 0.0)
    print("# With zero diagonal")
    print(val.compute())
    print("Flex value sum", val.flex_sum)

    # Symmetrical payoffs
    for i in range(3):
        for j in range(i):
            val.add_condition({(i, j): -1.0, (j, i): -1.0}, 0.0)
    print("# Adding val(i,j) = -val(j,i)")
    print(val.compute())
    print("Flex value sum", val.flex_sum)

    #return ### Goofspiel(3) is boring, Goofspiel(4) hits OOM
    print("#### Goofspiel(4) card value estimation")
    g = Goofspiel(4)
    mc = OutcomeMCCFR(g, seed=42)
    mc.compute(2000)
    ef = InfoSetExpectedFeatures(g, goofspiel_feaures_cards, mc)
    for i, f in ef.info_features.items():
        print("INFOSET {}:\n{}".format(i, f))
        print(ef.info_next[i])

    return
    val = LPZeroSumValueLearning(g, infosampler, goofspiel_feaures_cards, mc)

    # Regularize: set one payoff to 1.0
    val.add_condition({(0, ): 1.0, (1, ): 1.0, (2, ): 1.0, (3, ): 1.0}, 10.0)
    print("# Regularizing card values mean to 2.5 (mean of 1..4)")
    print(len(val.conds_eq), len(val.conds_le), len(val.flex_variables))
    print(
        val.compute(
            options=dict(tol=1e-6, disp=True, sparse=True, lstsq=True)))
    print("Flex value sum", val.flex_sum)
Esempio n. 18
0
def test_parse_gambit_strategy_g3():
    g = Goofspiel(3, scoring=Goofspiel.Scoring.ZEROSUM)
    txt = "NE,1,0,0,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,0,1,1,1,0,0,1,1,1,1,1,1,1,0,0,1,1,1,1,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,1,1,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,0,1,1,1,1,0,1,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,0,1,1,1,0,0,1,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,1,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,0,1,1,1,1,0,1,1,1,1,0,1,1,1,1,1"
    strats = parse_strategy(g, txt)
    assert exploitability(g, 0, strats[0]) < 1e-6
    assert exploitability(g, 0, strats[1]) < 1e-6
Esempio n. 19
0
def test_best_response_limit():
    game = Goofspiel(3)
    BestResponse(game, 0, [UniformStrategy()] * 2)
    with pytest.raises(LimitExceeded, message="traversed more than"):
        BestResponse(game, 0, [UniformStrategy()] * 2, max_nodes=1024)
Esempio n. 20
0
def test_goofspeil_rewards():
    g = Goofspiel(2, Goofspiel.Scoring.ZEROSUM, rewards=[100, 11])
    for _ in range(10):
        history = g.play_strategies([UniformStrategy(), UniformStrategy()])
        t = history[-1]
        assert t.values() in ([0, 0], [-89, 89], [89, -89])