def test_goofspiel(): g = Goofspiel(4, scoring=Goofspiel.Scoring.ZEROSUM) mc = OutcomeMCCFR(g, seed=42) mc.compute(100) vs = LinearValueStore(goofspiel_feaures_cards(g.initial_state()), fix_mean=2.5) infosampler = InformationSetSampler(g, mc) val = SparseSGDLinearValueLearning(g, goofspiel_feaures_cards, vs, infosampler, seed=43) val.compute([mc, mc], 100, 0.1, 0.01) print(vs.values)
def main(): N = 4 ITERS = 2000000 g = Goofspiel(N, scoring=Goofspiel.Scoring.ZEROSUM) mc = OutcomeMCCFR(g, seed=56) fname = "goof-{}".format(N) its = 1024 while its < ITERS: cached = mc.persist(fname, iterations=its) if not cached: print("Exploitability after {:7d} turns (mc, g): {}, {}".format( its, exploitability(g, 0, mc), exploitability(g, 1, mc))) its *= 2 infosampler = InformationSetSampler(g, mc) vsts = (1, 3) gsts = (1, 3) ax0 = plt.subplot(len(vsts), len(gsts), 1) for i, (vst, gst) in enumerate(itertools.product(vsts, gsts)): vs = LinearValueStore(goofspiel_feaures_cards(g.initial_state()), fix_mean=(N + 1) / 2.0) vl = SparseSGDLinearValueLearning(g, goofspiel_feaures_cards, vs, infosampler, seed=44) vals = np.concatenate([ vl.compute([mc, mc], 1000, step=s, record_every=1, val_samples=vst, grad_samples=gst) for s in [2**-8, 2**-9, 2**-10, 2**-11] ], axis=0) #c = ['red', 'green', 'blue', 'black'][i] ax = plt.subplot(len(vsts), len(gsts), i + 1, sharex=ax0, sharey=ax0) ax.plot(vals) ax.legend(list(range(1, N + 1))) ax.set_title("valseps={} gradsteps={}".format(vst, gst)) print("Done sampling valseps={} gradsteps={}".format(vst, gst)) print("Values:", vs.values) plt.show() return g2 = Goofspiel(N, scoring=Goofspiel.Scoring.ZEROSUM, rewards=vs.values) mc2 = OutcomeMCCFR(g2, seed=57) mc2.compute(iterations=ITERS) print("Exp(mc2, g2)", exploitability(g2, 0, mc2), exploitability(g2, 1, mc2)) print("Exp(mc2, g)", exploitability(g, 0, mc2), exploitability(g, 1, mc2))
def test_goofspeil(): g = Goofspiel(7) s = g.initial_state() assert s.player() == s.P_CHANCE assert s.score(0) == 0 assert s.score(1) == 0 assert s.actions() == list(range(7)) assert ( s.chance_distribution().probabilities() == (pytest.approx(1 / 7), ) * 7).all() for i, a in enumerate([3, 1, 0, 4, 3, 5, 5, 2, 2, 1, 4, 3, 2, 6]): s = s.play(a) assert s.player() == (i + 1) % 3 - 1 assert s.round() == 4 assert s.player() == 1 assert s.actions() == [1, 4, 6] assert s.winners() == [0, 1, -1, 0] assert ( s.chance_distribution().probabilities() == (pytest.approx(1.0 / 3), ) * 3).all() assert s.score(0) == 6 assert s.score(1) == 5 assert s.cards_in_hand(-1) == [0, 6] assert s.cards_in_hand(0) == [0, 5] assert s.cards_in_hand(1) == [1, 4, 6] for a in [1, 6, 5, 6, 0, 0, 0]: s = s.play(a) assert s.is_terminal() assert s.score(0) == 9 assert s.score(1) == 12 assert s.values() == (-1, 1)