def test_priornet_tictactoe_self_play(self): ttt = TicTacToe() nn = NeuralNetwork(ttt, PriorNet) t = Trainer(ttt, nn, num_simulations=2, num_games=1, num_updates=0, buffer_size_limit=None, cpuct=1, num_threads=4) data = t.self_play(temperature=0) np.testing.assert_equal(data[:,-1], np.array([1, -1, 1, -1, 1, -1, 1])) s = ttt.get_initial_state() np.testing.assert_equal(data[0,0], s) np.testing.assert_equal(data[0,1], np.array([0, 1, 0, 0, 0, 0, 0, 0, 0])) s = ttt.take_action(s, np.array([[0,1,0],[0,0,0],[0,0,0]])) # Top-middle X np.testing.assert_equal(data[1,0], s) np.testing.assert_equal(data[1,1], np.array([1, 0, 0, 0, 0, 0, 0, 0])) s = ttt.take_action(s, np.array([[1,0,0],[0,0,0],[0,0,0]])) # Top-left O np.testing.assert_equal(data[2,0], s) np.testing.assert_equal(data[2,1], np.array([1, 0, 0, 0, 0, 0, 0])) s = ttt.take_action(s, np.array([[0,0,1],[0,0,0],[0,0,0]])) # Top-right X np.testing.assert_equal(data[3,0], s) np.testing.assert_equal(data[3,1], np.array([1, 0, 0, 0, 0, 0])) s = ttt.take_action(s, np.array([[0,0,0],[1,0,0],[0,0,0]])) # Mid-left O np.testing.assert_equal(data[4,0], s) np.testing.assert_equal(data[4,1], np.array([1, 0, 0, 0, 0])) s = ttt.take_action(s, np.array([[0,0,0],[0,1,0],[0,0,0]])) # Mid-mid X np.testing.assert_equal(data[5,0], s) np.testing.assert_equal(data[5,1], np.array([1, 0, 0, 0])) s = ttt.take_action(s, np.array([[0,0,0],[0,0,1],[0,0,0]])) # Mid-right O np.testing.assert_equal(data[6,0], s) np.testing.assert_equal(data[6,1], np.array([1, 0, 0]))
def test_policy_iteration(self): ttt = TicTacToe() nn = NeuralNetwork(ttt, PriorNet) t = Trainer(ttt, nn, num_simulations=2, num_games=100, num_updates=0, buffer_size_limit=None, cpuct=1, num_threads=4) t.policy_iteration() states = t.training_data[:,0] inits = 0 for s in states: if (s.astype(np.float32) == ttt.get_initial_state()).all(): inits += 1 self.assertEqual(inits, 100)