예제 #1
0
    def test_priornet_tictactoe_self_play(self):
        ttt = TicTacToe()
        nn = NeuralNetwork(ttt, PriorNet)

        t = Trainer(ttt, nn, num_simulations=2, num_games=1, num_updates=0, buffer_size_limit=None, cpuct=1, num_threads=4)
        data = t.self_play(temperature=0)

        np.testing.assert_equal(data[:,-1], np.array([1, -1, 1, -1, 1, -1, 1]))
        s = ttt.get_initial_state()
        np.testing.assert_equal(data[0,0], s)
        np.testing.assert_equal(data[0,1], np.array([0, 1, 0, 0, 0, 0, 0, 0, 0]))
        s = ttt.take_action(s, np.array([[0,1,0],[0,0,0],[0,0,0]])) # Top-middle X
        np.testing.assert_equal(data[1,0], s) 
        np.testing.assert_equal(data[1,1], np.array([1, 0, 0, 0, 0, 0, 0, 0]))
        s = ttt.take_action(s, np.array([[1,0,0],[0,0,0],[0,0,0]])) # Top-left O
        np.testing.assert_equal(data[2,0], s)
        np.testing.assert_equal(data[2,1], np.array([1, 0, 0, 0, 0, 0, 0]))
        s = ttt.take_action(s, np.array([[0,0,1],[0,0,0],[0,0,0]])) # Top-right X
        np.testing.assert_equal(data[3,0], s)
        np.testing.assert_equal(data[3,1], np.array([1, 0, 0, 0, 0, 0]))
        s = ttt.take_action(s, np.array([[0,0,0],[1,0,0],[0,0,0]])) # Mid-left O
        np.testing.assert_equal(data[4,0], s)
        np.testing.assert_equal(data[4,1], np.array([1, 0, 0, 0, 0]))
        s = ttt.take_action(s, np.array([[0,0,0],[0,1,0],[0,0,0]])) # Mid-mid X
        np.testing.assert_equal(data[5,0], s)
        np.testing.assert_equal(data[5,1], np.array([1, 0, 0, 0]))
        s = ttt.take_action(s, np.array([[0,0,0],[0,0,1],[0,0,0]])) # Mid-right O
        np.testing.assert_equal(data[6,0], s)
        np.testing.assert_equal(data[6,1], np.array([1, 0, 0]))
예제 #2
0
 def test_policy_iteration(self):
     ttt = TicTacToe()
     nn = NeuralNetwork(ttt, PriorNet)
     t = Trainer(ttt, nn, num_simulations=2, num_games=100, num_updates=0, buffer_size_limit=None, cpuct=1, num_threads=4)
     t.policy_iteration()
     states = t.training_data[:,0]
     inits = 0
     for s in states:
         if (s.astype(np.float32) == ttt.get_initial_state()).all():
             inits += 1
     self.assertEqual(inits, 100)