コード例 #1
0
    def test_save_restore(self):
        # Set up training data
        td = training_data.training_data()
        td.add(np.ones([1, 4, 4]), 0, 4, np.zeros([1, 4, 4]))
        td.add(np.zeros([1, 4, 4]), 1, 2, np.ones([1, 4, 4]))
        td.add(np.ones([1, 4, 4]), 2, 16, np.zeros([1, 4, 4]))
        td.add(np.zeros([1, 4, 4]), 3, 2, np.ones([1, 4, 4]))

        temp_dir = tempfile.mkdtemp()
        temp_filename = os.path.join(temp_dir, 'data.csv')
        td.export_csv(temp_filename)

        td2 = training_data.training_data()
        td2.import_csv(temp_filename)

        expected_x = np.array(
            [[[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]],
             [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
             [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]],
             [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]],
            dtype=np.int)
        expected_y_digit = np.array([[0], [1], [2], [3]], dtype=np.int)
        expected_reward = np.array([[4], [2], [16], [2]], dtype=np.float)
        expected_next_x = np.array(
            [[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
             [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]],
             [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
             [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]]],
            dtype=np.int)
        assert np.array_equal(td2.get_x(), expected_x)
        assert np.array_equal(td2.get_y_digit(), expected_y_digit)
        assert np.allclose(td2.get_reward(), expected_reward)
        assert np.array_equal(td2.get_next_x(), expected_next_x)
        os.remove(temp_filename)
        os.rmdir(temp_dir)
コード例 #2
0
    def test_smooth_rewards(self):
        # Set up training data
        td = training_data.training_data()
        td.add(np.ones([1, 4, 4]), 0, 4, np.zeros([1, 4, 4]))
        td.add(np.ones([1, 4, 4]), 1, 2, np.zeros([1, 4, 4]))
        td.add(np.ones([1, 4, 4]), 2, 16, np.zeros([1, 4, 4]))
        td.add(np.ones([1, 4, 4]), 3, 2, np.zeros([1, 4, 4]))

        # Test using default lambda value of 0.9
        td2 = td.copy()
        td2.smooth_rewards()
        expected_reward = np.array([[20.218], [18.02], [17.8], [2.0]],
                                   dtype=np.float)
        self.assertTrue(np.allclose(td2.get_reward(), expected_reward))

        # Test using lambda value of 0, should have no effect on rewards
        td2 = td.copy()
        td2.smooth_rewards(llambda=0.0)
        expected_reward = np.array([[4], [2], [16], [2]], dtype=np.float)
        self.assertTrue(np.allclose(td2.get_reward(), expected_reward))

        # Test end of episode
        td3 = training_data.training_data()
        td3.add(np.ones([1, 4, 4]), 0, 4, np.zeros([1, 4, 4]), False)
        td3.add(np.ones([1, 4, 4]), 1, 2, np.zeros([1, 4, 4]), True)
        td3.add(np.ones([1, 4, 4]), 2, 16, np.zeros([1, 4, 4]), False)
        td3.add(np.ones([1, 4, 4]), 3, 2, np.zeros([1, 4, 4]), True)
        td3.smooth_rewards()
        expected_reward = np.array([[5.8], [2.0], [17.8], [2.0]],
                                   dtype=np.float)
        self.assertTrue(np.allclose(td3.get_reward(), expected_reward))
コード例 #3
0
ファイル: test_training_data.py プロジェクト: tjussh/gym-2048
    def test_save_restore(self):
        # Set up training data
        td = training_data.training_data()
        td.add(np.ones([1, 4, 4]), 0, 4)
        td.add(np.zeros([1, 4, 4]), 1, 2)
        td.add(np.ones([1, 4, 4]), 2, 16)
        td.add(np.zeros([1, 4, 4]), 3, 2)

        f = tempfile.NamedTemporaryFile()
        td.export_csv(f.name)

        td2 = training_data.training_data()
        td2.import_csv(f.name)

        expected_x = np.array(
            [[[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]],
             [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
             [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]],
             [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]],
            dtype=np.int)
        expected_y_digit = np.array([[0], [1], [2], [3]], dtype=np.int)
        expected_reward = np.array([[4], [2], [16], [2]], dtype=np.float)
        self.assertTrue(np.array_equal(td2.get_x(), expected_x))
        self.assertTrue(np.array_equal(td2.get_y_digit(), expected_y_digit))
        self.assertTrue(np.allclose(td2.get_reward(), expected_reward))
        os.remove(f.name)
コード例 #4
0
 def test_normalize_rewards(self):
     # Test calculating mean and standard deviation
     td = training_data.training_data()
     td.add(np.ones([1, 4, 4]), 1, 4, np.zeros([1, 4, 4]))
     td.add(np.ones([1, 4, 4]), 2, 4, np.zeros([1, 4, 4]))
     td.add(np.ones([1, 4, 4]), 3, 8, np.zeros([1, 4, 4]))
     td.add(np.ones([1, 4, 4]), 0, 16, np.zeros([1, 4, 4]))
     td.normalize_rewards()
     expected_reward = np.array([
         [-0.8165],
         [-0.8165],
         [0.],
         [1.633],
     ],
                                dtype=np.float)
     self.assertTrue(np.allclose(td.get_reward(), expected_reward))
     # Test specifying mean and standard deviation
     td = training_data.training_data()
     td.add(np.ones([1, 4, 4]), 1, 4, np.zeros([1, 4, 4]))
     td.add(np.ones([1, 4, 4]), 2, 4, np.zeros([1, 4, 4]))
     td.add(np.ones([1, 4, 4]), 3, 8, np.zeros([1, 4, 4]))
     td.add(np.ones([1, 4, 4]), 0, 16, np.zeros([1, 4, 4]))
     td.normalize_rewards(mean=8, sd=1)
     expected_reward = np.array([
         [-4.],
         [-4.],
         [0.],
         [8.],
     ],
                                dtype=np.float)
     self.assertTrue(np.allclose(td.get_reward(), expected_reward))
コード例 #5
0
 def test_merge(self):
     td = training_data.training_data()
     td.add(np.ones([1, 4, 4]), 1, 16, np.zeros([1, 4, 4]))
     td2 = training_data.training_data()
     td2.add(np.zeros([1, 4, 4]), 2, 0, np.ones([1, 4, 4]))
     td.merge(td2)
     expected_x = np.array([
         [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]],
         [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]
         ], dtype=int)
     expected_y_digit = np.array([
         [1],
         [2]
         ], dtype=int)
     expected_reward = np.array([
         [16],
         [0]
         ], dtype=float)
     expected_next_x = np.array([
         [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
         [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]]
         ], dtype=int)
     assert np.array_equal(td.get_x(), expected_x)
     assert np.array_equal(td.get_y_digit(), expected_y_digit)
     assert np.allclose(td.get_reward(), expected_reward)
     assert np.array_equal(td.get_next_x(), expected_next_x)
コード例 #6
0
    def test_get_discounted_return(self):
        # Set up training data
        td = training_data.training_data()
        td.add(np.ones([1, 4, 4]), 0, 4, np.zeros([1, 4, 4]))
        td.add(np.ones([1, 4, 4]), 1, 2, np.zeros([1, 4, 4]))
        td.add(np.ones([1, 4, 4]), 2, 16, np.zeros([1, 4, 4]))
        td.add(np.ones([1, 4, 4]), 3, 2, np.zeros([1, 4, 4]))

        # Test using default gamma value of 0.9
        td2 = td.copy()
        discounted_return = td2.get_discounted_return()
        expected_return = np.array([[20.218], [18.02], [17.8], [2.0]],
                                   dtype=np.float)
        assert np.allclose(discounted_return, expected_return)

        # Test using gamma value of 0, should have no effect on rewards
        td2 = td.copy()
        discounted_return = td2.get_discounted_return(gamma=0.0)
        expected_return = np.array([[4], [2], [16], [2]], dtype=np.float)
        assert np.allclose(discounted_return, expected_return)

        # Test end of episode
        td3 = training_data.training_data()
        td3.add(np.ones([1, 4, 4]), 0, 4, np.zeros([1, 4, 4]), False)
        td3.add(np.ones([1, 4, 4]), 1, 2, np.zeros([1, 4, 4]), True)
        td3.add(np.ones([1, 4, 4]), 2, 16, np.zeros([1, 4, 4]), False)
        td3.add(np.ones([1, 4, 4]), 3, 2, np.zeros([1, 4, 4]), True)
        discounted_return = td3.get_discounted_return()
        expected_return = np.array([[5.8], [2.0], [17.8], [2.0]],
                                   dtype=np.float)
        assert np.allclose(discounted_return, expected_return)
コード例 #7
0
ファイル: test_training_data.py プロジェクト: tjussh/gym-2048
 def test_split(self):
     td = training_data.training_data()
     td.add(np.ones([1, 4, 4]), 1, 16)
     td2 = training_data.training_data()
     td2.add(np.zeros([1, 4, 4]), 2, 0)
     td.merge(td2)
     a, b = td.split()
     self.assertTrue(np.array_equal(a.get_x(), np.ones([1, 4, 4])))
     self.assertTrue(np.array_equal(a.get_y_digit(), [[1]]))
     self.assertTrue(np.array_equal(b.get_x(), np.zeros([1, 4, 4])))
     self.assertTrue(np.array_equal(b.get_y_digit(), [[2]]))
コード例 #8
0
 def test_split(self):
     td = training_data.training_data()
     td.add(np.ones([1, 4, 4]), 1, 16, np.zeros([1, 4, 4]))
     td2 = training_data.training_data()
     td2.add(np.zeros([1, 4, 4]), 2, 0, np.ones([1, 4, 4]))
     td.merge(td2)
     a, b = td.split()
     assert np.array_equal(a.get_x(), np.ones([1, 4, 4]))
     assert np.array_equal(a.get_y_digit(), [[1]])
     assert np.array_equal(a.get_reward(), [[16]])
     assert np.array_equal(a.get_next_x(), np.zeros([1, 4, 4]))
     assert np.array_equal(b.get_x(), np.zeros([1, 4, 4]))
     assert np.array_equal(b.get_y_digit(), [[2]])
     assert np.array_equal(b.get_reward(), [[0]])
     assert np.array_equal(b.get_next_x(), np.ones([1, 4, 4]))
コード例 #9
0
def add_rewards_to_training_data(env, input_training_data):
    new_training_data = training_data.training_data()
    for n in range(input_training_data.size()):
        (state, action) = input_training_data.get_n(n)
        reward = get_reward_for_state_action(env, state, action)
        new_training_data.add(state, action, reward)
    return new_training_data
コード例 #10
0
 def test_hflip(self):
     td = training_data.training_data()
     board1 = np.array([[1, 1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0],
                        [0, 0, 0, 0]])
     board2 = np.array([[0, 0, 0, 0], [2, 4, 0, 0], [0, 0, 0, 0],
                        [0, 0, 0, 0]])
     td.add(board1, 1, 2, board2)
     td.add(board2, 2, 0, board1)
     td.hflip()
     expected_x = np.array(
         [[[0, 0, 1, 1], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
          [[0, 0, 0, 0], [0, 0, 4, 2], [0, 0, 0, 0], [0, 0, 0, 0]]],
         dtype=np.int)
     expected_y_digit = np.array([[3], [2]], dtype=np.int)
     expected_reward = np.array([
         [2],
         [0],
     ], dtype=np.float)
     expected_next_x = np.array(
         [[[0, 0, 0, 0], [0, 0, 4, 2], [0, 0, 0, 0], [0, 0, 0, 0]],
          [[0, 0, 1, 1], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]],
         dtype=np.int)
     assert np.array_equal(td.get_x(), expected_x)
     assert np.array_equal(td.get_y_digit(), expected_y_digit)
     assert np.allclose(td.get_reward(), expected_reward)
     assert np.allclose(td.get_next_x(), expected_next_x)
コード例 #11
0
 def test_normalize_boards(self):
     # Test calculating mean and standard deviation
     td = training_data.training_data()
     td.add(np.full((1, 4, 4), 4), 1, 4, np.full((1, 4, 4), 8))
     td.add(np.full((1, 4, 4), 8), 2, 4, np.full((1, 4, 4), 16))
     td.add(np.full((1, 4, 4), 16), 3, 4, np.full((1, 4, 4), 32))
     td.add(np.full((1, 4, 4), 32), 4, 4, np.full((1, 4, 4), 64))
     td.normalize_boards()
     mean = 15.
     sd = 10.7238052947636
     a = (4. - mean) / sd
     b = (8. - mean) / sd
     c = (16. - mean) / sd
     d = (32. - mean) / sd
     e = (64. - mean) / sd
     expected_x = np.array(
         [[[a, a, a, a], [a, a, a, a], [a, a, a, a], [a, a, a, a]],
          [[b, b, b, b], [b, b, b, b], [b, b, b, b], [b, b, b, b]],
          [[c, c, c, c], [c, c, c, c], [c, c, c, c], [c, c, c, c]],
          [[d, d, d, d], [d, d, d, d], [d, d, d, d], [d, d, d, d]]],
         dtype=np.float)
     assert np.allclose(td.get_x(), expected_x)
     expected_next_x = np.array(
         [[[b, b, b, b], [b, b, b, b], [b, b, b, b], [b, b, b, b]],
          [[c, c, c, c], [c, c, c, c], [c, c, c, c], [c, c, c, c]],
          [[d, d, d, d], [d, d, d, d], [d, d, d, d], [d, d, d, d]],
          [[e, e, e, e], [e, e, e, e], [e, e, e, e], [e, e, e, e]]],
         dtype=np.float)
     assert np.allclose(td.get_next_x(), expected_next_x)
コード例 #12
0
 def test_rotate(self):
     td = training_data.training_data()
     board1 = np.array([[1, 1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0],
                        [0, 0, 0, 0]])
     board2 = np.array([[0, 0, 0, 0], [2, 4, 0, 0], [0, 0, 0, 0],
                        [0, 0, 0, 0]])
     td.add(board1, 1, 2, board2)
     td.add(board2, 2, 0, board1)
     td.rotate(3)
     expected_x = np.array(
         [[[0, 0, 0, 0], [0, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0]],
          [[0, 0, 0, 0], [0, 0, 0, 0], [0, 4, 0, 0], [0, 2, 0, 0]]],
         dtype=np.int)
     expected_y_digit = np.array([[0], [1]], dtype=np.int)
     expected_reward = np.array([
         [2],
         [0],
     ], dtype=np.float)
     expected_next_x = np.array(
         [[[0, 0, 0, 0], [0, 0, 0, 0], [0, 4, 0, 0], [0, 2, 0, 0]],
          [[0, 0, 0, 0], [0, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0]]],
         dtype=np.int)
     self.assertTrue(np.array_equal(td.get_x(), expected_x))
     self.assertTrue(np.array_equal(td.get_y_digit(), expected_y_digit))
     self.assertTrue(np.allclose(td.get_reward(), expected_reward))
     self.assertTrue(np.array_equal(td.get_next_x(), expected_next_x))
コード例 #13
0
 def test_get_total_reward(self):
     td = training_data.training_data()
     td.add(np.ones([4, 4]), 0, 4, np.zeros([4, 4]))
     td.add(np.zeros([4, 4]), 1, 8, np.ones([4, 4]))
     td.add(np.zeros([4, 4]), 3, 16, np.ones([4, 4]))
     td.add(np.zeros([4, 4]), 2, 32, np.ones([4, 4]))
     assert td.get_total_reward() == 60
コード例 #14
0
 def test_get_n(self):
     td = training_data.training_data()
     td.add(np.ones([4, 4]), 1, 4, np.zeros([4, 4]))
     td.add(np.zeros([4, 4]), 2, 8, np.ones([4, 4]))
     (state, action, reward, next_state, done) = td.get_n(1)
     assert np.array_equal(state, np.zeros([4, 4], dtype=np.int))
     assert action == 2
     assert reward == pytest.approx(8.)
     assert np.array_equal(next_state, np.ones([4, 4], dtype=np.int))
コード例 #15
0
ファイル: test_training_data.py プロジェクト: tjussh/gym-2048
 def test_get_n(self):
     # Test get_n with reward
     td = training_data.training_data()
     td.add(np.ones([4, 4]), 1, 4)
     td.add(np.zeros([4, 4]), 2, 8)
     (state, action, reward) = td.get_n(1)
     self.assertTrue(np.array_equal(state, np.zeros([4, 4], dtype=np.int)))
     self.assertEqual(action, 2)
     self.assertAlmostEqual(reward, 8)
コード例 #16
0
 def test_augment(self):
     td = training_data.training_data()
     initial_board = np.array([[1, 1, 0, 0],
                               [0, 0, 0, 0],
                               [0, 0, 0, 0],
                               [0, 0, 0, 0]])
     next_board = np.array([[0, 0, 0, 2],
                            [0, 2, 0, 0],
                            [0, 0, 0, 0],
                            [0, 0, 0, 0]])
     td.add(initial_board, 1, 4, next_board)
     td.augment()
     assert td.size() == 8
     expected_x = np.array([
         [[1, 1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
         [[0, 0, 1, 1], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
         [[0, 0, 0, 1], [0, 0, 0, 1], [0, 0, 0, 0], [0, 0, 0, 0]],
         [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 0, 1]],
         [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 1, 1]],
         [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [1, 1, 0, 0]],
         [[0, 0, 0, 0], [0, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0]],
         [[1, 0, 0, 0], [1, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]
         ], dtype=int)
     expected_y_digit = np.array([
         [1],
         [3],
         [2],
         [0],
         [3],
         [1],
         [0],
         [2]
         ], dtype=int)
     expected_reward = np.array([
         [4],
         [4],
         [4],
         [4],
         [4],
         [4],
         [4],
         [4]
         ], dtype=float)
     expected_next_x = np.array([
         [[0, 0, 0, 2], [0, 2, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], # Original
         [[2, 0, 0, 0], [0, 0, 2, 0], [0, 0, 0, 0], [0, 0, 0, 0]], # Hflip'd
         [[0, 0, 0, 0], [0, 0, 2, 0], [0, 0, 0, 0], [0, 0, 0, 2]], # Original, rotated 90 degrees
         [[0, 0, 0, 2], [0, 0, 0, 0], [0, 0, 2, 0], [0, 0, 0, 0]], # Hflip, rotated 90 degrees
         [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 2, 0], [2, 0, 0, 0]], # Original, rotated 180 degrees
         [[0, 0, 0, 0], [0, 0, 0, 0], [0, 2, 0, 0], [0, 0, 0, 2]], # Hflip, rotated 180 degrees
         [[2, 0, 0, 0], [0, 0, 0, 0], [0, 2, 0, 0], [0, 0, 0, 0]], # Original, rotate 270 degrees
         [[0, 0, 0, 0], [0, 2, 0, 0], [0, 0, 0, 0], [2, 0, 0, 0]]  # Hflip, rotated 270 degrees
         ], dtype=int)
     assert np.array_equal(td.get_x(), expected_x)
     assert np.array_equal(td.get_y_digit(), expected_y_digit)
     assert np.allclose(td.get_reward(), expected_reward)
     assert np.array_equal(td.get_next_x(), expected_next_x)
コード例 #17
0
 def test_get_x_stacked(self):
     td = training_data.training_data()
     td.add(np.full([4, 4], 2), 0, 4, np.zeros([4, 4]))
     td.add(np.full([4, 4], 8), 1, 8, np.ones([4, 4]))
     td.add(np.full([4, 4], 2048), 1, 8, np.ones([4, 4]))
     expected_x_stacked = np.array(
         [[[[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
           [[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
           [[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
           [[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]],
          [[[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
           [[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
           [[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
           [[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]],
          [[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]],
           [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]],
           [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]],
           [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]]]],
         dtype=np.int)
     assert np.array_equal(td.get_x_stacked(), expected_x_stacked)
コード例 #18
0
 def test_get_n(self):
     td = training_data.training_data()
     td.add(np.ones([4, 4]), 1, 4, np.zeros([4, 4]))
     td.add(np.zeros([4, 4]), 2, 8, np.ones([4, 4]))
     (state, action, reward, next_state, done) = td.get_n(1)
     self.assertTrue(np.array_equal(state, np.zeros([4, 4], dtype=np.int)))
     self.assertEqual(action, 2)
     self.assertAlmostEqual(reward, 8)
     self.assertTrue(
         np.array_equal(next_state, np.ones([4, 4], dtype=np.int)))
コード例 #19
0
 def test_get_y_one_hot(self):
     td = training_data.training_data()
     td.add(np.ones([4, 4]), 0, 4, np.zeros([4, 4]))
     td.add(np.zeros([4, 4]), 1, 8, np.ones([4, 4]))
     td.add(np.zeros([4, 4]), 3, 8, np.ones([4, 4]))
     td.add(np.zeros([4, 4]), 2, 8, np.ones([4, 4]))
     expected_y_one_hot = np.array(
         [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 0, 1], [0, 0, 1, 0]],
         dtype=np.int)
     assert np.array_equal(td.get_y_one_hot(), expected_y_one_hot)
コード例 #20
0
 def test_sample(self):
     td = training_data.training_data()
     td.add(np.zeros([1, 4, 4]), 0, 0, np.zeros([1, 4, 4]))
     td.add(np.ones([1, 4, 4]), 1, 1, np.ones([1, 4, 4]))
     sample = td.sample([1])
     assert sample.size() == 1
     assert sample.get_y_digit() in [[[0]], [[1]]]
     if sample.get_y_digit() == 0:
         assert np.array_equal(sample.get_x(), np.zeros([1, 4, 4]))
     if sample.get_y_digit() == 1:
         assert np.array_equal(sample.get_x(), np.ones([1, 4, 4]))
コード例 #21
0
 def test_sample(self):
     td = training_data.training_data()
     td.add(np.zeros([1, 4, 4]), 0, 0, np.zeros([1, 4, 4]))
     td.add(np.ones([1, 4, 4]), 1, 1, np.ones([1, 4, 4]))
     sample = td.sample([1])
     self.assertEqual(sample.size(), 1)
     self.assertIn(sample.get_y_digit(), [[[0]], [[1]]])
     if sample.get_y_digit() == 0:
         self.assertTrue(np.array_equal(sample.get_x(), np.zeros([1, 4,
                                                                  4])))
     if sample.get_y_digit() == 1:
         self.assertTrue(np.array_equal(sample.get_x(), np.ones([1, 4, 4])))
コード例 #22
0
 def test_add(self):
     td = training_data.training_data()
     assert np.array_equal(td.get_x(), np.empty([0, 4, 4], dtype=int))
     assert np.array_equal(td.get_y_digit(), np.empty([0, 1], dtype=int))
     assert np.allclose(td.get_reward(), np.empty([0, 1], dtype=float))
     assert np.array_equal(td.get_next_x(), np.empty([0, 4, 4], dtype=int))
     assert np.array_equal(td.get_done(), np.empty([0, 1], dtype=bool))
     td.add(np.ones([1, 4, 4]), 1, 4, np.zeros([1, 4, 4]), True)
     assert np.array_equal(td.get_x(), np.ones([1, 4, 4], dtype=int))
     assert np.array_equal(td.get_y_digit(), np.array([[1]], dtype=int))
     assert np.allclose(td.get_reward(), np.array([[4]], dtype=float))
     assert np.array_equal(td.get_next_x(), np.zeros([1, 4, 4], dtype=int))
     assert np.array_equal(td.get_done(), np.array([[1]], dtype=bool))
コード例 #23
0
ファイル: test_training_data.py プロジェクト: tjussh/gym-2048
 def test_log2_rewards(self):
     # Set up training data
     td = training_data.training_data()
     td.add(np.ones([1, 4, 4]), 0, 0)
     td.add(np.ones([1, 4, 4]), 1, 2)
     td.add(np.ones([1, 4, 4]), 2, 4)
     td.add(np.ones([1, 4, 4]), 3, 16)
     td.add(np.ones([1, 4, 4]), 0, 75)
     td.add(np.ones([1, 4, 4]), 1, 2048)
     td.log2_rewards()
     expected_reward = np.array([[0], [1], [2], [4], [6.2288], [11]],
                                dtype=np.float)
     self.assertTrue(np.allclose(td.get_reward(), expected_reward))
     expected_action = np.array([[0], [1], [2], [3], [0], [1]],
                                dtype=np.int)
     self.assertTrue(np.allclose(td.get_y_digit(), expected_action))
コード例 #24
0
ファイル: test_training_data.py プロジェクト: tjussh/gym-2048
 def test_add(self):
     # Test add with reward
     td = training_data.training_data()
     self.assertTrue(
         np.array_equal(td.get_x(), np.empty([0, 4, 4], dtype=np.int)))
     self.assertTrue(
         np.array_equal(td.get_y_digit(), np.empty([0, 1], dtype=np.int)))
     self.assertTrue(
         np.allclose(td.get_reward(), np.empty([0, 1], dtype=np.float)))
     td.add(np.ones([1, 4, 4]), 1, 4)
     self.assertTrue(
         np.array_equal(td.get_x(), np.ones([1, 4, 4], dtype=np.int)))
     self.assertTrue(
         np.array_equal(td.get_y_digit(), np.array([[1]], dtype=np.int)))
     self.assertTrue(
         np.allclose(td.get_reward(), np.array([[4]], dtype=np.float)))
コード例 #25
0
 def test_log2_rewards(self):
     # Set up training data
     td = training_data.training_data()
     td.add(np.ones([1, 4, 4]), 0, 0, np.zeros([1, 4, 4]))
     td.add(np.ones([1, 4, 4]), 1, 2, np.zeros([1, 4, 4]))
     td.add(np.ones([1, 4, 4]), 2, 4, np.zeros([1, 4, 4]))
     td.add(np.ones([1, 4, 4]), 3, 16, np.zeros([1, 4, 4]))
     td.add(np.ones([1, 4, 4]), 0, 75, np.zeros([1, 4, 4]))
     td.add(np.ones([1, 4, 4]), 1, 2048, np.zeros([1, 4, 4]))
     td.log2_rewards()
     expected_reward = np.array([
         [0], [1], [2], [4], [6.2288], [11]
         ], dtype=float)
     assert np.allclose(td.get_reward(), expected_reward)
     expected_action = np.array([
         [0], [1], [2], [3], [0], [1]
         ], dtype=int)
     assert np.allclose(td.get_y_digit(), expected_action)
コード例 #26
0
    def test_shuffle(self):
        td = training_data.training_data()
        n = 5
        for i in range(n):
            # Use "is odd" for done
            td.add(np.full((1, 4, 4), i), i, i, np.full((1, 4, 4), i), (i % 2) == 1)
        td.shuffle()
        for i in range(n):
            # Find where this has been shuffled too
            index_of_val = np.where(td.get_y_digit() == i)[0].item(0)

            # Check that all parts of this equal i
            arrays = td.get_n(index_of_val)
            for a in arrays:
                if a.dtype is np.dtype(bool):
                    assert((a == ((i % 2) == 1)).all())
                else:
                    assert((a == i).all())
コード例 #27
0
 def test_make_boards_unique(self):
     td = training_data.training_data()
     td.add(np.ones([1, 4, 4]), 0, 4, np.zeros([1, 4, 4]))
     td.add(np.zeros([1, 4, 4]), 1, 2, np.ones([1, 4, 4]))
     td.add(np.ones([1, 4, 4]), 2, 16, np.zeros([1, 4, 4]))
     td.add(np.zeros([1, 4, 4]), 3, 2, np.ones([1, 4, 4]))
     td.make_boards_unique()
     expected_x = np.array(
         [[[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]],
          [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]],
         dtype=np.int)
     expected_y_digit = np.array([[0], [1]], dtype=np.int)
     expected_reward = np.array([[4], [2]], dtype=np.float)
     expected_next_x = np.array(
         [[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
          [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]]],
         dtype=np.int)
     assert np.array_equal(td.get_x(), expected_x)
     assert np.array_equal(td.get_y_digit(), expected_y_digit)
     assert np.allclose(td.get_reward(), expected_reward)
     assert np.array_equal(td.get_next_x(), expected_next_x)
コード例 #28
0
def gather_training_data(env, seed=None):
    """Gather training data from letting the user play the game"""
    # Data is a list of input and outputs
    data = training_data.training_data()
    # Initialise seed for environment
    if seed:
        env.seed(seed)
    else:
        env.seed()
    observation = env.reset()
    print("User cursor keys to play, q to quit")
    try:
        while True:
            # Loop around performing moves
            action = None
            env.render()
            # Ask user for action
            # Read and discard the keyup event
            print("Read action {}".format(action))

            # Add this data to the data collection
            new_observation, reward, done, info = env.step(action)
            if np.array_equal(observation, new_observation):
                print("Suppressing recording of illegal move")
            else:
                data.add(observation, action, reward)
            observation = new_observation
            print()

            if done:
                # Draw final board
                env.render()
                print("End of game")
                break
    except Exiting:
        print("Exiting...")

    return data
コード例 #29
0
ファイル: test_training_data.py プロジェクト: tjussh/gym-2048
 def test_augment(self):
     td = training_data.training_data()
     initial_board = np.array([[1, 1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0],
                               [0, 0, 0, 0]])
     td.add(initial_board, 1, 4)
     td.augment()
     self.assertEqual(td.size(), 8)
     expected_x = np.array(
         [[[1, 1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
          [[0, 0, 1, 1], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
          [[0, 0, 0, 1], [0, 0, 0, 1], [0, 0, 0, 0], [0, 0, 0, 0]],
          [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 0, 1]],
          [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 1, 1]],
          [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [1, 1, 0, 0]],
          [[0, 0, 0, 0], [0, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0]],
          [[1, 0, 0, 0], [1, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]],
         dtype=np.int)
     expected_y_digit = np.array([[1], [3], [2], [0], [3], [1], [0], [2]],
                                 dtype=np.int)
     expected_reward = np.array([[4], [4], [4], [4], [4], [4], [4], [4]],
                                dtype=np.float)
     self.assertTrue(np.array_equal(td.get_x(), expected_x))
     self.assertTrue(np.array_equal(td.get_y_digit(), expected_y_digit))
     self.assertTrue(np.allclose(td.get_reward(), expected_reward))
コード例 #30
0
import numpy as np
import face_detect as face_detect
import training_data as training_data

label = []


def predict(test_img):
    img = cv2.imread(test_img).copy()
    print "\n\n\n"
    print "Face Prediction Running -\-"
    face, rect, length = face_detect.face_detect(test_img)
    print len(face), "faces detected."
    for i in range(0, len(face)):
        labeltemp, confidence = face_recognizer.predict(face[i])
        label.append(labeltemp)
    return img, label


faces, labels = training_data.training_data("training-data")
face_recognizer = cv2.face.LBPHFaceRecognizer_create()
face_recognizer.train(faces, np.array(labels))

# Read the test image.
test_img = "test-data/test.jpg"
predicted_img, label = predict(test_img)
cv2.destroyAllWindows()
cv2.waitKey(1)
cv2.destroyAllWindows()
print "Recognized faces = ", label