def test_best_response_rps(): bart_simpson_strategy = FixedStrategy(Explicit([1, 0, 0], values=["R", "P", "S"])) game = RockPaperScissors() strategy = BestResponse(game, 0, {1: bart_simpson_strategy}) assert list(strategy.best_responses.values())[0].probability("R") == 0.0 assert list(strategy.best_responses.values())[0].probability("P") == 1.0 assert list(strategy.best_responses.values())[0].probability("S") == 0.0 assert strategy.value == pytest.approx(1.0) strategy = BestResponse(game, 1, {0: bart_simpson_strategy}) assert list(strategy.best_responses.values())[0].probability("R") == 0.0 assert list(strategy.best_responses.values())[0].probability("P") == 1.0 assert list(strategy.best_responses.values())[0].probability("S") == 0.0 assert strategy.value == pytest.approx(1.0)
def test_best_response_rps(): bart_simpson_strategy = ConstStrategy((1, 0, 0)) game = RockPaperScissors() for p in [0, 1]: strategy = BestResponse(game, p, [bart_simpson_strategy] * 2) assert tuple(strategy.best_responses.values())[0] == pytest.approx( (0.0, 1.0, 0.0)) assert strategy.value == pytest.approx(1.0)
def test_best_response_goofspiel(): for n_cards, br_value in [(3, pytest.approx(4/3)), (4, pytest.approx(2.5))]: game = Goofspiel(n_cards, Goofspiel.Scoring.ZEROSUM) strategy = BestResponse(game, 0, {1: UniformStrategy()}) for k, v in strategy.best_responses.items(): reward = k[1][-1] assert reward not in v.values() or v.probability(reward) == 1.0 assert strategy.value == br_value
def test_mccfr_dicepoker(): g = DicePoker() mc = OutcomeMCCFR(g, seed=52) mc.compute(10000, burn=0.5) br0 = BestResponse(g, 0, mc.strategies) assert br0.value < 0.3 payoff0 = sample_payoff(g, [br0, mc.strategies[1]], 10000, seed=3)[0] assert br0.value == pytest.approx(payoff0[0], abs=0.05) br1 = BestResponse(g, 1, mc.strategies) assert br1.value > -0.2 payoff1 = sample_payoff(g, [mc.strategies[0], br1], 10000, seed=4)[0] assert br1.value == pytest.approx(payoff1[1], abs=0.05) print(br0.value, br1.value, payoff0, payoff1) assert payoff0[0] > payoff1[0] assert payoff0[0] < 0.3 assert payoff1[0] > 0.1
def test_onecardpoker_mc(): g = OneCardPoker() mc = OutcomeMCCFR(g, seed=56) mc.compute(1000) #print(mc.iss) br = BestResponse(g, 1, mc.strategies) #print(br.value) assert np.mean([ play_strategies(g, [mc.strategies[0], br], seed=i).payoff[0] for i in range(1000) ]) > -0.4
def test_best_response_goofspiel(): for n_cards, br_value in [(3, pytest.approx(4 / 3)), (4, pytest.approx(2.5))]: game = Goofspiel(n_cards, Goofspiel.Scoring.ZEROSUM) strategy = BestResponse(game, 0, [UniformStrategy()] * 2) for k, v in strategy.best_responses.items(): reward = k[-1] played_cards = k[0::3] idx = len([ i for i in range(n_cards) if i < reward and i not in played_cards ]) assert reward in played_cards or v[idx] == 1.0 assert strategy.value == br_value
def non_test_goofspiel(): g = Goofspiel(4, scoring=Goofspiel.Scoring.ZEROSUM) mc = OutcomeMCCFR(g, seed=42) for s in [10, 100, 1000]: mc.compute(s) br = BestResponse(g, 0, [None, mc]) print( "Exploit after", s, np.mean([ g.play_strategies([br, mc], seed=i)[-1].values()[0] for i in range(1000) ])) vs = GoofSpielCardsValueStore(g) val = SparseStochasticValueLearning(g, vs, seed=43) for alpha in [0.1, 0.01, 0.01, 0.001, 0.0001]: print(alpha) val.compute([mc, mc], 200, alpha)
def test_best_response_limit(): game = Goofspiel(3) BestResponse(game, 0, [UniformStrategy()] * 2) with pytest.raises(LimitExceeded, message="traversed more than"): BestResponse(game, 0, [UniformStrategy()] * 2, max_nodes=1024)