예제 #1
0
def test_backup_with_ucb():
    root = BanditNode()
    state = FakeGameState()
    root.expand(state)
    children = root.child_nodes()

    state.play(children[0].action)
    children[0].expand(state)

    children[0].child_nodes()[0].backup(-1)
    children[0].child_nodes()[1].backup(1)
    assert BanditNode.ucb_value(children[0], 1) == 0.8325546111576977
    assert BanditNode.ucb_value(children[0].child_nodes()[0], 1) == (
        0.17741002251547466)
    assert BanditNode.ucb_value(children[0].child_nodes()[1], 1) == (
        2.177410022515475)
예제 #2
0
def test_backup_with_ucb_explore():
    root = UctNode(1)
    state = FakeGameState()
    root.expand(state)
    children = root.child_nodes()

    state.play(children[0].action)
    children[0].expand(state)

    children[0].child_nodes()[0].backup(1)
    assert children[0].value() == -1
    assert children[1].value() == float("inf")
    assert children[0].child_nodes()[0].value() == 1
    assert children[0].child_nodes()[1].value() == float("inf")
    children[0].child_nodes()[1].backup(-1)
    assert children[0].value() > 0
    assert children[0].child_nodes()[0].value() > 1
    assert children[0].child_nodes()[1].value() > -1
예제 #3
0
def test_backup_with_value():
    root = BanditNode()
    state = FakeGameState()
    root.expand(state)
    children = root.child_nodes()

    state.play(children[0].action)
    children[0].expand(state)

    children[0].child_nodes()[0].backup(1)
    assert children[0].value() == -1
    assert children[1].value() == 0
    assert children[0].child_nodes()[0].value() == 1
    assert children[0].child_nodes()[1].value() == 0
    children[0].child_nodes()[1].backup(-1)
    assert children[0].value() == 0
    assert children[0].child_nodes()[0].value() == 1
    assert children[0].child_nodes()[1].value() == -1
예제 #4
0
def test_backup_with_value():
    rave_moves = {0: [0]}
    root = RaveNode(1, 300)
    state = FakeGameState()
    root.expand(state)

    children = root.child_nodes()
    state.play(children[0].action)
    children[0].expand(state)

    children[0].child_nodes()[0].backup(-1, rave_moves)
    assert children[0].child_nodes()[0].rave_num_visits == 1
    assert children[0].child_nodes()[1].rave_num_visits == 0
    assert children[0].value() == 0.0033333333333332993
    assert children[1].value() == INF
    assert children[0].child_nodes()[0].value() == -1
    assert children[0].child_nodes()[1].value() == INF
    children[0].child_nodes()[1].backup(1, rave_moves)
    assert children[0].child_nodes()[0].rave_num_visits == 2
    assert children[0].child_nodes()[1].rave_num_visits == 0
    assert children[0].value() == 0.8325546111576977
    assert children[0].child_nodes()[0].value() == 1.1740766891821415
    assert children[0].child_nodes()[1].value() == 1.1807433558488079
예제 #5
0
def test_backup():
    rave_moves = {0: [0]}
    root = RaveNode(1, 300)
    state = FakeGameState()
    RaveNode.enable_rave(state)
    root.expand(state)
    children = root.child_nodes()

    state.play(children[0].action)
    children[0].expand(state)

    children[0].child_nodes()[0].backup(-1, rave_moves)
    assert children[0].child_nodes()[0].avg_reward() == -1
    assert children[0].child_nodes()[1].avg_reward() == 0

    assert children[0].avg_reward() == 1
    assert len(children[1].child_nodes()) == 0
    assert children[1].avg_reward() == 0
    children[0].child_nodes()[1].backup(1, rave_moves)
    assert children[0].child_nodes()[0].avg_reward() == -1
    assert children[0].child_nodes()[1].avg_reward() == 1
    assert children[0].avg_reward() == 0
    assert len(children[1].child_nodes()) == 0
    assert children[1].avg_reward() == 0