예제 #1
0
def test_goofspiel():
    g = Goofspiel(4, scoring=Goofspiel.Scoring.ZEROSUM)
    mc = OutcomeMCCFR(g, seed=42)
    mc.compute(100)
    vs = LinearValueStore(goofspiel_feaures_cards(g.initial_state()), fix_mean=2.5)
    infosampler = InformationSetSampler(g, mc)
    val = SparseSGDLinearValueLearning(g, goofspiel_feaures_cards, vs, infosampler, seed=43)
    val.compute([mc, mc], 100, 0.1, 0.01)
    print(vs.values)
예제 #2
0
def main():
    N = 4
    ITERS = 2000000
    g = Goofspiel(N, scoring=Goofspiel.Scoring.ZEROSUM)
    mc = OutcomeMCCFR(g, seed=56)
    fname = "goof-{}".format(N)

    its = 1024
    while its < ITERS:
        cached = mc.persist(fname, iterations=its)
        if not cached:
            print("Exploitability after {:7d} turns (mc, g): {}, {}".format(
                its, exploitability(g, 0, mc), exploitability(g, 1, mc)))
        its *= 2
    infosampler = InformationSetSampler(g, mc)

    vsts = (1, 3)
    gsts = (1, 3)
    ax0 = plt.subplot(len(vsts), len(gsts), 1)
    for i, (vst, gst) in enumerate(itertools.product(vsts, gsts)):
        vs = LinearValueStore(goofspiel_feaures_cards(g.initial_state()),
                              fix_mean=(N + 1) / 2.0)
        vl = SparseSGDLinearValueLearning(g,
                                          goofspiel_feaures_cards,
                                          vs,
                                          infosampler,
                                          seed=44)
        vals = np.concatenate([
            vl.compute([mc, mc],
                       1000,
                       step=s,
                       record_every=1,
                       val_samples=vst,
                       grad_samples=gst)
            for s in [2**-8, 2**-9, 2**-10, 2**-11]
        ],
                              axis=0)
        #c = ['red', 'green', 'blue', 'black'][i]
        ax = plt.subplot(len(vsts), len(gsts), i + 1, sharex=ax0, sharey=ax0)
        ax.plot(vals)
        ax.legend(list(range(1, N + 1)))
        ax.set_title("valseps={} gradsteps={}".format(vst, gst))
        print("Done sampling valseps={} gradsteps={}".format(vst, gst))
        print("Values:", vs.values)
    plt.show()

    return

    g2 = Goofspiel(N, scoring=Goofspiel.Scoring.ZEROSUM, rewards=vs.values)
    mc2 = OutcomeMCCFR(g2, seed=57)
    mc2.compute(iterations=ITERS)
    print("Exp(mc2, g2)", exploitability(g2, 0, mc2),
          exploitability(g2, 1, mc2))
    print("Exp(mc2, g)", exploitability(g, 0, mc2), exploitability(g, 1, mc2))
예제 #3
0
def test_goofspeil():
    g = Goofspiel(7)
    s = g.initial_state()

    assert s.player() == s.P_CHANCE
    assert s.score(0) == 0
    assert s.score(1) == 0
    assert s.actions() == list(range(7))
    assert (
        s.chance_distribution().probabilities() == (pytest.approx(1 / 7), ) *
        7).all()

    for i, a in enumerate([3, 1, 0, 4, 3, 5, 5, 2, 2, 1, 4, 3, 2, 6]):
        s = s.play(a)
        assert s.player() == (i + 1) % 3 - 1

    assert s.round() == 4
    assert s.player() == 1
    assert s.actions() == [1, 4, 6]
    assert s.winners() == [0, 1, -1, 0]
    assert (
        s.chance_distribution().probabilities() == (pytest.approx(1.0 / 3), ) *
        3).all()
    assert s.score(0) == 6
    assert s.score(1) == 5

    assert s.cards_in_hand(-1) == [0, 6]
    assert s.cards_in_hand(0) == [0, 5]
    assert s.cards_in_hand(1) == [1, 4, 6]

    for a in [1, 6, 5, 6, 0, 0, 0]:
        s = s.play(a)

    assert s.is_terminal()
    assert s.score(0) == 9
    assert s.score(1) == 12

    assert s.values() == (-1, 1)