Exemple #1
0
def test_best_response_rps():

    bart_simpson_strategy = FixedStrategy(Explicit([1, 0, 0], values=["R", "P", "S"]))
    game = RockPaperScissors()
    strategy = BestResponse(game, 0, {1: bart_simpson_strategy})
    assert list(strategy.best_responses.values())[0].probability("R") == 0.0
    assert list(strategy.best_responses.values())[0].probability("P") == 1.0
    assert list(strategy.best_responses.values())[0].probability("S") == 0.0
    assert strategy.value == pytest.approx(1.0)

    strategy = BestResponse(game, 1, {0: bart_simpson_strategy})
    assert list(strategy.best_responses.values())[0].probability("R") == 0.0
    assert list(strategy.best_responses.values())[0].probability("P") == 1.0
    assert list(strategy.best_responses.values())[0].probability("S") == 0.0
    assert strategy.value == pytest.approx(1.0)
Exemple #2
0
def test_best_response_rps():
    bart_simpson_strategy = ConstStrategy((1, 0, 0))
    game = RockPaperScissors()
    for p in [0, 1]:
        strategy = BestResponse(game, p, [bart_simpson_strategy] * 2)
        assert tuple(strategy.best_responses.values())[0] == pytest.approx(
            (0.0, 1.0, 0.0))
        assert strategy.value == pytest.approx(1.0)
Exemple #3
0
def test_best_response_goofspiel():

    for n_cards, br_value in [(3, pytest.approx(4/3)), (4, pytest.approx(2.5))]:
        game = Goofspiel(n_cards, Goofspiel.Scoring.ZEROSUM)
        strategy = BestResponse(game, 0, {1: UniformStrategy()})
        for k, v in strategy.best_responses.items():
            reward = k[1][-1]
            assert reward not in v.values() or v.probability(reward) == 1.0
        assert strategy.value == br_value
Exemple #4
0
def test_mccfr_dicepoker():

    g = DicePoker()
    mc = OutcomeMCCFR(g, seed=52)
    mc.compute(10000, burn=0.5)

    br0 = BestResponse(g, 0, mc.strategies)
    assert br0.value < 0.3
    payoff0 = sample_payoff(g, [br0, mc.strategies[1]], 10000, seed=3)[0]
    assert br0.value == pytest.approx(payoff0[0], abs=0.05)

    br1 = BestResponse(g, 1, mc.strategies)
    assert br1.value > -0.2
    payoff1 = sample_payoff(g, [mc.strategies[0], br1], 10000, seed=4)[0]
    assert br1.value == pytest.approx(payoff1[1], abs=0.05)

    print(br0.value, br1.value, payoff0, payoff1)
    assert payoff0[0] > payoff1[0]
    assert payoff0[0] < 0.3
    assert payoff1[0] > 0.1
Exemple #5
0
def test_onecardpoker_mc():

    g = OneCardPoker()
    mc = OutcomeMCCFR(g, seed=56)
    mc.compute(1000)
    #print(mc.iss)
    br = BestResponse(g, 1, mc.strategies)
    #print(br.value)
    assert np.mean([
        play_strategies(g, [mc.strategies[0], br], seed=i).payoff[0]
        for i in range(1000)
    ]) > -0.4
Exemple #6
0
def test_best_response_goofspiel():
    for n_cards, br_value in [(3, pytest.approx(4 / 3)),
                              (4, pytest.approx(2.5))]:
        game = Goofspiel(n_cards, Goofspiel.Scoring.ZEROSUM)
        strategy = BestResponse(game, 0, [UniformStrategy()] * 2)
        for k, v in strategy.best_responses.items():
            reward = k[-1]
            played_cards = k[0::3]
            idx = len([
                i for i in range(n_cards)
                if i < reward and i not in played_cards
            ])
            assert reward in played_cards or v[idx] == 1.0
        assert strategy.value == br_value
Exemple #7
0
def non_test_goofspiel():
    g = Goofspiel(4, scoring=Goofspiel.Scoring.ZEROSUM)
    mc = OutcomeMCCFR(g, seed=42)
    for s in [10, 100, 1000]:
        mc.compute(s)
        br = BestResponse(g, 0, [None, mc])
        print(
            "Exploit after", s,
            np.mean([
                g.play_strategies([br, mc], seed=i)[-1].values()[0]
                for i in range(1000)
            ]))

    vs = GoofSpielCardsValueStore(g)
    val = SparseStochasticValueLearning(g, vs, seed=43)
    for alpha in [0.1, 0.01, 0.01, 0.001, 0.0001]:
        print(alpha)
        val.compute([mc, mc], 200, alpha)
Exemple #8
0
def test_best_response_limit():
    game = Goofspiel(3)
    BestResponse(game, 0, [UniformStrategy()] * 2)
    with pytest.raises(LimitExceeded, message="traversed more than"):
        BestResponse(game, 0, [UniformStrategy()] * 2, max_nodes=1024)