コード例 #1
0
def test_mcts_can_self_play_noughts_and_crosses():
    nac = NoughtsAndCrosses()
    estimator = create_trivial_estimator(nac)
    player1 = MCTSPlayer(nac, estimator, 100, 0.5)
    player2 = MCTSPlayer(nac, estimator, 100, 0.5)
    players = {1: player1, 2: player2}

    actions, game_states, utility = play(nac, players)

    assert len(actions) == len(game_states) - 1
    assert game_states[0] == nac.initial_state
    assert nac.is_terminal(game_states[-1])
コード例 #2
0
def test_backwards_induction_on_nac_o_plays_top_right():
    nac = NoughtsAndCrosses()

    state = (0b011000000, 0b000001000, 1)

    utility, best_action = backwards_induction.backwards_induction(nac, state)

    assert best_action == (2, 2)
コード例 #3
0
def test_neural_net_estimate_game_state():
    nac = NoughtsAndCrosses()
    nnet = NACNetEstimator(learning_rate=0.01,
                           l2_weight=0.1,
                           action_indices=nac.action_indices)

    test_game_state = np.random.randn(7, 9)

    computed = nnet(test_game_state)
コード例 #4
0
def test_backwards_induction_on_nac():
    nac = NoughtsAndCrosses()
    state = (0b001000000, 0b000000000, 2)

    best_actions = {}
    backwards_induction.solve_game(best_actions, nac, state)
    print(best_actions)
    assert best_actions[state] == (1, 1)

    state = (0b001000000, 0b000010000, 1)

    assert best_actions[state] == (0, 1)
コード例 #5
0
def test_evaluator_on_noughts_and_crosses():
    np.random.seed(0)

    nac = NoughtsAndCrosses()
    estimator = create_trivial_estimator(nac)
    player1 = MCTSPlayer(nac, estimator, 100, 0.5)
    player2 = MCTSPlayer(nac, estimator, 100, 0.5)
    players = {1: player1, 2: player2}

    # Check the evaluators aren't equal.
    assert player1 is not player2

    player1_results = evaluate(nac, players, 20)
コード例 #6
0
def test_nac_net_call():
    np.random.seed(0)
    nac = NoughtsAndCrosses()
    net = NACNetEstimator(learning_rate=0.01,
                          l2_weight=0.1,
                          action_indices=nac.action_indices)

    state = (0, ) * 9

    computed = net(state)

    probs_dict, value = computed
    assert isinstance(probs_dict, dict)
    assert len(probs_dict) == 9
コード例 #7
0
def test_initialising_basic_net_with_random_parameters(
):  # TODO: redo this on mock game
    nac = NoughtsAndCrosses()
    nnet = NACNetEstimator(learning_rate=0.01,
                           l2_weight=0.1,
                           action_indices=nac.action_indices)

    # Initialise state of all 1s.
    states = np.ones((7, 9))
    pis = np.random.rand(7, 9)
    outcomes = np.random.rand(7, 1)

    nnet.sess.run(nnet.tensors['loss'],
                  feed_dict={
                      nnet.tensors['state_vector']: states,
                      nnet.tensors['pi']: pis,
                      nnet.tensors['outcomes']: outcomes
                  })
コード例 #8
0
def test_can_use_two_neural_nets():
    np.random.seed(0)
    nac = NoughtsAndCrosses()
    nnet1 = NACNetEstimator(learning_rate=0.01,
                            l2_weight=0.1,
                            action_indices=nac.action_indices)
    nnet2 = NACNetEstimator(learning_rate=0.01,
                            l2_weight=0.1,
                            action_indices=nac.action_indices)

    test_game_state = np.random.randn(1, 9)

    probs_dict1, value1 = nnet1(test_game_state)
    probs_dict2, value2 = nnet2(test_game_state)

    # Check that the outputs are different. Since the input to both nets is the
    # same, this tests whether the nets are different.
    assert probs_dict1 != probs_dict2
    assert value1 != value2
コード例 #9
0
def test_basic_nac_net_tensor_shapes():
    np.random.seed(0)
    nac = NoughtsAndCrosses()
    nnet = NACNetEstimator(learning_rate=0.01,
                           l2_weight=0.1,
                           action_indices=nac.action_indices)

    batch_size = 5

    # Set up the states, probs, zs arrays.
    states = np.random.randn(batch_size, 9)
    pis = np.random.rand(batch_size, 9)
    zs = np.random.randn(batch_size, 1)

    tensors = [
        nnet.tensors['loss'],
        nnet.tensors['loss_probs'],
        nnet.tensors['loss_value'],
        nnet.tensors['probs'],
        nnet.tensors['value'],
    ]

    computed_tensors = nnet.sess.run(tensors,
                                     feed_dict={
                                         nnet.tensors['state_vector']: states,
                                         nnet.tensors['pi']: pis,
                                         nnet.tensors['outcomes']: zs,
                                     })

    loss, loss_probs, loss_value, probs, value = computed_tensors

    # The loss should be positive
    assert loss_probs > 0
    assert loss_value > 0
    assert loss > 0

    # The loss should be a scalar.
    assert np.shape(loss) == ()
    assert np.shape(probs) == (batch_size, 9)
    assert np.shape(value) == (batch_size, 1)
コード例 #10
0
import time

from alphago.games import NoughtsAndCrosses
from alphago.estimator import NAC3x6NetEstimator
from alphago.alphago import train_alphago
from alphago.utilities import memoize_instance

learning_rate = 0.1
game = NoughtsAndCrosses(rows=3, columns=6)
memoize_instance(game)
game_name = 'noughts_and_crosses'


def create_estimator():
    return NAC3x6NetEstimator(learning_rate=learning_rate,
                              action_indices=game.action_indices,
                              l2_weight=0.00001)


self_play_iters = 20
training_iters = 1000
evaluate_every = 5
alphago_steps = 2000
mcts_iters = 30
c_puct = 1.0
replay_length = 10000
num_evaluate_games = 50
win_rate = 0.55
batch_size = 32

current_time_format = time.strftime('experiment-%Y-%m-%d_%H:%M:%S')
コード例 #11
0
from alphago.player import MCTSPlayer, RandomPlayer, OptimalPlayer
from alphago.games import NoughtsAndCrosses, connect_four
from alphago.utilities import memoize_instance
from alphago.estimator import create_trivial_estimator
from alphago.evaluator import evaluate, play

nac = NoughtsAndCrosses(3, 6)
# memoize_instance(nac)

trivial_estimator = create_trivial_estimator(nac)
player2 = MCTSPlayer(nac, trivial_estimator, 30, 0.5, 0.01)
player1 = MCTSPlayer(nac, trivial_estimator, 30, 0.5, 0.01)

evaluate(nac, {2: player2, 1: player1}, 1000)
コード例 #12
0
from alphago.games import NoughtsAndCrosses
from alphago.estimator import NACNetEstimator
from alphago.alphago import train_alphago

from alphago.utilities import memoize_instance

learning_rate = 1e-4
game = NoughtsAndCrosses()
memoize_instance(game)


def create_estimator():
    return NACNetEstimator(learning_rate=learning_rate,
                           action_indices=game.action_indices,
                           l2_weight=0.1)


self_play_iters = 20
training_iters = 1000
evaluate_every = 5
alphago_steps = 2000
mcts_iters = 30
c_puct = 1.0
replay_length = 10000
num_evaluate_games = 30
win_rate = 0.8

checkpoint_path = 'checkpoints/'
pre_step = None

losses = train_alphago(game,