def test_save_restore(self): # Set up training data td = training_data.training_data() td.add(np.ones([1, 4, 4]), 0, 4, np.zeros([1, 4, 4])) td.add(np.zeros([1, 4, 4]), 1, 2, np.ones([1, 4, 4])) td.add(np.ones([1, 4, 4]), 2, 16, np.zeros([1, 4, 4])) td.add(np.zeros([1, 4, 4]), 3, 2, np.ones([1, 4, 4])) temp_dir = tempfile.mkdtemp() temp_filename = os.path.join(temp_dir, 'data.csv') td.export_csv(temp_filename) td2 = training_data.training_data() td2.import_csv(temp_filename) expected_x = np.array( [[[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]], dtype=np.int) expected_y_digit = np.array([[0], [1], [2], [3]], dtype=np.int) expected_reward = np.array([[4], [2], [16], [2]], dtype=np.float) expected_next_x = np.array( [[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]]], dtype=np.int) assert np.array_equal(td2.get_x(), expected_x) assert np.array_equal(td2.get_y_digit(), expected_y_digit) assert np.allclose(td2.get_reward(), expected_reward) assert np.array_equal(td2.get_next_x(), expected_next_x) os.remove(temp_filename) os.rmdir(temp_dir)
def test_smooth_rewards(self): # Set up training data td = training_data.training_data() td.add(np.ones([1, 4, 4]), 0, 4, np.zeros([1, 4, 4])) td.add(np.ones([1, 4, 4]), 1, 2, np.zeros([1, 4, 4])) td.add(np.ones([1, 4, 4]), 2, 16, np.zeros([1, 4, 4])) td.add(np.ones([1, 4, 4]), 3, 2, np.zeros([1, 4, 4])) # Test using default lambda value of 0.9 td2 = td.copy() td2.smooth_rewards() expected_reward = np.array([[20.218], [18.02], [17.8], [2.0]], dtype=np.float) self.assertTrue(np.allclose(td2.get_reward(), expected_reward)) # Test using lambda value of 0, should have no effect on rewards td2 = td.copy() td2.smooth_rewards(llambda=0.0) expected_reward = np.array([[4], [2], [16], [2]], dtype=np.float) self.assertTrue(np.allclose(td2.get_reward(), expected_reward)) # Test end of episode td3 = training_data.training_data() td3.add(np.ones([1, 4, 4]), 0, 4, np.zeros([1, 4, 4]), False) td3.add(np.ones([1, 4, 4]), 1, 2, np.zeros([1, 4, 4]), True) td3.add(np.ones([1, 4, 4]), 2, 16, np.zeros([1, 4, 4]), False) td3.add(np.ones([1, 4, 4]), 3, 2, np.zeros([1, 4, 4]), True) td3.smooth_rewards() expected_reward = np.array([[5.8], [2.0], [17.8], [2.0]], dtype=np.float) self.assertTrue(np.allclose(td3.get_reward(), expected_reward))
def test_save_restore(self): # Set up training data td = training_data.training_data() td.add(np.ones([1, 4, 4]), 0, 4) td.add(np.zeros([1, 4, 4]), 1, 2) td.add(np.ones([1, 4, 4]), 2, 16) td.add(np.zeros([1, 4, 4]), 3, 2) f = tempfile.NamedTemporaryFile() td.export_csv(f.name) td2 = training_data.training_data() td2.import_csv(f.name) expected_x = np.array( [[[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]], dtype=np.int) expected_y_digit = np.array([[0], [1], [2], [3]], dtype=np.int) expected_reward = np.array([[4], [2], [16], [2]], dtype=np.float) self.assertTrue(np.array_equal(td2.get_x(), expected_x)) self.assertTrue(np.array_equal(td2.get_y_digit(), expected_y_digit)) self.assertTrue(np.allclose(td2.get_reward(), expected_reward)) os.remove(f.name)
def test_normalize_rewards(self): # Test calculating mean and standard deviation td = training_data.training_data() td.add(np.ones([1, 4, 4]), 1, 4, np.zeros([1, 4, 4])) td.add(np.ones([1, 4, 4]), 2, 4, np.zeros([1, 4, 4])) td.add(np.ones([1, 4, 4]), 3, 8, np.zeros([1, 4, 4])) td.add(np.ones([1, 4, 4]), 0, 16, np.zeros([1, 4, 4])) td.normalize_rewards() expected_reward = np.array([ [-0.8165], [-0.8165], [0.], [1.633], ], dtype=np.float) self.assertTrue(np.allclose(td.get_reward(), expected_reward)) # Test specifying mean and standard deviation td = training_data.training_data() td.add(np.ones([1, 4, 4]), 1, 4, np.zeros([1, 4, 4])) td.add(np.ones([1, 4, 4]), 2, 4, np.zeros([1, 4, 4])) td.add(np.ones([1, 4, 4]), 3, 8, np.zeros([1, 4, 4])) td.add(np.ones([1, 4, 4]), 0, 16, np.zeros([1, 4, 4])) td.normalize_rewards(mean=8, sd=1) expected_reward = np.array([ [-4.], [-4.], [0.], [8.], ], dtype=np.float) self.assertTrue(np.allclose(td.get_reward(), expected_reward))
def test_merge(self): td = training_data.training_data() td.add(np.ones([1, 4, 4]), 1, 16, np.zeros([1, 4, 4])) td2 = training_data.training_data() td2.add(np.zeros([1, 4, 4]), 2, 0, np.ones([1, 4, 4])) td.merge(td2) expected_x = np.array([ [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]] ], dtype=int) expected_y_digit = np.array([ [1], [2] ], dtype=int) expected_reward = np.array([ [16], [0] ], dtype=float) expected_next_x = np.array([ [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]] ], dtype=int) assert np.array_equal(td.get_x(), expected_x) assert np.array_equal(td.get_y_digit(), expected_y_digit) assert np.allclose(td.get_reward(), expected_reward) assert np.array_equal(td.get_next_x(), expected_next_x)
def test_get_discounted_return(self): # Set up training data td = training_data.training_data() td.add(np.ones([1, 4, 4]), 0, 4, np.zeros([1, 4, 4])) td.add(np.ones([1, 4, 4]), 1, 2, np.zeros([1, 4, 4])) td.add(np.ones([1, 4, 4]), 2, 16, np.zeros([1, 4, 4])) td.add(np.ones([1, 4, 4]), 3, 2, np.zeros([1, 4, 4])) # Test using default gamma value of 0.9 td2 = td.copy() discounted_return = td2.get_discounted_return() expected_return = np.array([[20.218], [18.02], [17.8], [2.0]], dtype=np.float) assert np.allclose(discounted_return, expected_return) # Test using gamma value of 0, should have no effect on rewards td2 = td.copy() discounted_return = td2.get_discounted_return(gamma=0.0) expected_return = np.array([[4], [2], [16], [2]], dtype=np.float) assert np.allclose(discounted_return, expected_return) # Test end of episode td3 = training_data.training_data() td3.add(np.ones([1, 4, 4]), 0, 4, np.zeros([1, 4, 4]), False) td3.add(np.ones([1, 4, 4]), 1, 2, np.zeros([1, 4, 4]), True) td3.add(np.ones([1, 4, 4]), 2, 16, np.zeros([1, 4, 4]), False) td3.add(np.ones([1, 4, 4]), 3, 2, np.zeros([1, 4, 4]), True) discounted_return = td3.get_discounted_return() expected_return = np.array([[5.8], [2.0], [17.8], [2.0]], dtype=np.float) assert np.allclose(discounted_return, expected_return)
def test_split(self): td = training_data.training_data() td.add(np.ones([1, 4, 4]), 1, 16) td2 = training_data.training_data() td2.add(np.zeros([1, 4, 4]), 2, 0) td.merge(td2) a, b = td.split() self.assertTrue(np.array_equal(a.get_x(), np.ones([1, 4, 4]))) self.assertTrue(np.array_equal(a.get_y_digit(), [[1]])) self.assertTrue(np.array_equal(b.get_x(), np.zeros([1, 4, 4]))) self.assertTrue(np.array_equal(b.get_y_digit(), [[2]]))
def test_split(self): td = training_data.training_data() td.add(np.ones([1, 4, 4]), 1, 16, np.zeros([1, 4, 4])) td2 = training_data.training_data() td2.add(np.zeros([1, 4, 4]), 2, 0, np.ones([1, 4, 4])) td.merge(td2) a, b = td.split() assert np.array_equal(a.get_x(), np.ones([1, 4, 4])) assert np.array_equal(a.get_y_digit(), [[1]]) assert np.array_equal(a.get_reward(), [[16]]) assert np.array_equal(a.get_next_x(), np.zeros([1, 4, 4])) assert np.array_equal(b.get_x(), np.zeros([1, 4, 4])) assert np.array_equal(b.get_y_digit(), [[2]]) assert np.array_equal(b.get_reward(), [[0]]) assert np.array_equal(b.get_next_x(), np.ones([1, 4, 4]))
def add_rewards_to_training_data(env, input_training_data): new_training_data = training_data.training_data() for n in range(input_training_data.size()): (state, action) = input_training_data.get_n(n) reward = get_reward_for_state_action(env, state, action) new_training_data.add(state, action, reward) return new_training_data
def test_hflip(self): td = training_data.training_data() board1 = np.array([[1, 1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]) board2 = np.array([[0, 0, 0, 0], [2, 4, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]) td.add(board1, 1, 2, board2) td.add(board2, 2, 0, board1) td.hflip() expected_x = np.array( [[[0, 0, 1, 1], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 4, 2], [0, 0, 0, 0], [0, 0, 0, 0]]], dtype=np.int) expected_y_digit = np.array([[3], [2]], dtype=np.int) expected_reward = np.array([ [2], [0], ], dtype=np.float) expected_next_x = np.array( [[[0, 0, 0, 0], [0, 0, 4, 2], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 1, 1], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]], dtype=np.int) assert np.array_equal(td.get_x(), expected_x) assert np.array_equal(td.get_y_digit(), expected_y_digit) assert np.allclose(td.get_reward(), expected_reward) assert np.allclose(td.get_next_x(), expected_next_x)
def test_normalize_boards(self): # Test calculating mean and standard deviation td = training_data.training_data() td.add(np.full((1, 4, 4), 4), 1, 4, np.full((1, 4, 4), 8)) td.add(np.full((1, 4, 4), 8), 2, 4, np.full((1, 4, 4), 16)) td.add(np.full((1, 4, 4), 16), 3, 4, np.full((1, 4, 4), 32)) td.add(np.full((1, 4, 4), 32), 4, 4, np.full((1, 4, 4), 64)) td.normalize_boards() mean = 15. sd = 10.7238052947636 a = (4. - mean) / sd b = (8. - mean) / sd c = (16. - mean) / sd d = (32. - mean) / sd e = (64. - mean) / sd expected_x = np.array( [[[a, a, a, a], [a, a, a, a], [a, a, a, a], [a, a, a, a]], [[b, b, b, b], [b, b, b, b], [b, b, b, b], [b, b, b, b]], [[c, c, c, c], [c, c, c, c], [c, c, c, c], [c, c, c, c]], [[d, d, d, d], [d, d, d, d], [d, d, d, d], [d, d, d, d]]], dtype=np.float) assert np.allclose(td.get_x(), expected_x) expected_next_x = np.array( [[[b, b, b, b], [b, b, b, b], [b, b, b, b], [b, b, b, b]], [[c, c, c, c], [c, c, c, c], [c, c, c, c], [c, c, c, c]], [[d, d, d, d], [d, d, d, d], [d, d, d, d], [d, d, d, d]], [[e, e, e, e], [e, e, e, e], [e, e, e, e], [e, e, e, e]]], dtype=np.float) assert np.allclose(td.get_next_x(), expected_next_x)
def test_rotate(self): td = training_data.training_data() board1 = np.array([[1, 1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]) board2 = np.array([[0, 0, 0, 0], [2, 4, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]) td.add(board1, 1, 2, board2) td.add(board2, 2, 0, board1) td.rotate(3) expected_x = np.array( [[[0, 0, 0, 0], [0, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 4, 0, 0], [0, 2, 0, 0]]], dtype=np.int) expected_y_digit = np.array([[0], [1]], dtype=np.int) expected_reward = np.array([ [2], [0], ], dtype=np.float) expected_next_x = np.array( [[[0, 0, 0, 0], [0, 0, 0, 0], [0, 4, 0, 0], [0, 2, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0]]], dtype=np.int) self.assertTrue(np.array_equal(td.get_x(), expected_x)) self.assertTrue(np.array_equal(td.get_y_digit(), expected_y_digit)) self.assertTrue(np.allclose(td.get_reward(), expected_reward)) self.assertTrue(np.array_equal(td.get_next_x(), expected_next_x))
def test_get_total_reward(self): td = training_data.training_data() td.add(np.ones([4, 4]), 0, 4, np.zeros([4, 4])) td.add(np.zeros([4, 4]), 1, 8, np.ones([4, 4])) td.add(np.zeros([4, 4]), 3, 16, np.ones([4, 4])) td.add(np.zeros([4, 4]), 2, 32, np.ones([4, 4])) assert td.get_total_reward() == 60
def test_get_n(self): td = training_data.training_data() td.add(np.ones([4, 4]), 1, 4, np.zeros([4, 4])) td.add(np.zeros([4, 4]), 2, 8, np.ones([4, 4])) (state, action, reward, next_state, done) = td.get_n(1) assert np.array_equal(state, np.zeros([4, 4], dtype=np.int)) assert action == 2 assert reward == pytest.approx(8.) assert np.array_equal(next_state, np.ones([4, 4], dtype=np.int))
def test_get_n(self): # Test get_n with reward td = training_data.training_data() td.add(np.ones([4, 4]), 1, 4) td.add(np.zeros([4, 4]), 2, 8) (state, action, reward) = td.get_n(1) self.assertTrue(np.array_equal(state, np.zeros([4, 4], dtype=np.int))) self.assertEqual(action, 2) self.assertAlmostEqual(reward, 8)
def test_augment(self): td = training_data.training_data() initial_board = np.array([[1, 1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]) next_board = np.array([[0, 0, 0, 2], [0, 2, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]) td.add(initial_board, 1, 4, next_board) td.augment() assert td.size() == 8 expected_x = np.array([ [[1, 1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 1, 1], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 1], [0, 0, 0, 1], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 0, 1]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 1, 1]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [1, 1, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0]], [[1, 0, 0, 0], [1, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]] ], dtype=int) expected_y_digit = np.array([ [1], [3], [2], [0], [3], [1], [0], [2] ], dtype=int) expected_reward = np.array([ [4], [4], [4], [4], [4], [4], [4], [4] ], dtype=float) expected_next_x = np.array([ [[0, 0, 0, 2], [0, 2, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], # Original [[2, 0, 0, 0], [0, 0, 2, 0], [0, 0, 0, 0], [0, 0, 0, 0]], # Hflip'd [[0, 0, 0, 0], [0, 0, 2, 0], [0, 0, 0, 0], [0, 0, 0, 2]], # Original, rotated 90 degrees [[0, 0, 0, 2], [0, 0, 0, 0], [0, 0, 2, 0], [0, 0, 0, 0]], # Hflip, rotated 90 degrees [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 2, 0], [2, 0, 0, 0]], # Original, rotated 180 degrees [[0, 0, 0, 0], [0, 0, 0, 0], [0, 2, 0, 0], [0, 0, 0, 2]], # Hflip, rotated 180 degrees [[2, 0, 0, 0], [0, 0, 0, 0], [0, 2, 0, 0], [0, 0, 0, 0]], # Original, rotate 270 degrees [[0, 0, 0, 0], [0, 2, 0, 0], [0, 0, 0, 0], [2, 0, 0, 0]] # Hflip, rotated 270 degrees ], dtype=int) assert np.array_equal(td.get_x(), expected_x) assert np.array_equal(td.get_y_digit(), expected_y_digit) assert np.allclose(td.get_reward(), expected_reward) assert np.array_equal(td.get_next_x(), expected_next_x)
def test_get_x_stacked(self): td = training_data.training_data() td.add(np.full([4, 4], 2), 0, 4, np.zeros([4, 4])) td.add(np.full([4, 4], 8), 1, 8, np.ones([4, 4])) td.add(np.full([4, 4], 2048), 1, 8, np.ones([4, 4])) expected_x_stacked = np.array( [[[[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], [[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], [[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], [[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]], [[[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], [[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], [[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], [[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]], [[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]], [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]], [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]], [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]]]], dtype=np.int) assert np.array_equal(td.get_x_stacked(), expected_x_stacked)
def test_get_n(self): td = training_data.training_data() td.add(np.ones([4, 4]), 1, 4, np.zeros([4, 4])) td.add(np.zeros([4, 4]), 2, 8, np.ones([4, 4])) (state, action, reward, next_state, done) = td.get_n(1) self.assertTrue(np.array_equal(state, np.zeros([4, 4], dtype=np.int))) self.assertEqual(action, 2) self.assertAlmostEqual(reward, 8) self.assertTrue( np.array_equal(next_state, np.ones([4, 4], dtype=np.int)))
def test_get_y_one_hot(self): td = training_data.training_data() td.add(np.ones([4, 4]), 0, 4, np.zeros([4, 4])) td.add(np.zeros([4, 4]), 1, 8, np.ones([4, 4])) td.add(np.zeros([4, 4]), 3, 8, np.ones([4, 4])) td.add(np.zeros([4, 4]), 2, 8, np.ones([4, 4])) expected_y_one_hot = np.array( [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 0, 1], [0, 0, 1, 0]], dtype=np.int) assert np.array_equal(td.get_y_one_hot(), expected_y_one_hot)
def test_sample(self): td = training_data.training_data() td.add(np.zeros([1, 4, 4]), 0, 0, np.zeros([1, 4, 4])) td.add(np.ones([1, 4, 4]), 1, 1, np.ones([1, 4, 4])) sample = td.sample([1]) assert sample.size() == 1 assert sample.get_y_digit() in [[[0]], [[1]]] if sample.get_y_digit() == 0: assert np.array_equal(sample.get_x(), np.zeros([1, 4, 4])) if sample.get_y_digit() == 1: assert np.array_equal(sample.get_x(), np.ones([1, 4, 4]))
def test_sample(self): td = training_data.training_data() td.add(np.zeros([1, 4, 4]), 0, 0, np.zeros([1, 4, 4])) td.add(np.ones([1, 4, 4]), 1, 1, np.ones([1, 4, 4])) sample = td.sample([1]) self.assertEqual(sample.size(), 1) self.assertIn(sample.get_y_digit(), [[[0]], [[1]]]) if sample.get_y_digit() == 0: self.assertTrue(np.array_equal(sample.get_x(), np.zeros([1, 4, 4]))) if sample.get_y_digit() == 1: self.assertTrue(np.array_equal(sample.get_x(), np.ones([1, 4, 4])))
def test_add(self): td = training_data.training_data() assert np.array_equal(td.get_x(), np.empty([0, 4, 4], dtype=int)) assert np.array_equal(td.get_y_digit(), np.empty([0, 1], dtype=int)) assert np.allclose(td.get_reward(), np.empty([0, 1], dtype=float)) assert np.array_equal(td.get_next_x(), np.empty([0, 4, 4], dtype=int)) assert np.array_equal(td.get_done(), np.empty([0, 1], dtype=bool)) td.add(np.ones([1, 4, 4]), 1, 4, np.zeros([1, 4, 4]), True) assert np.array_equal(td.get_x(), np.ones([1, 4, 4], dtype=int)) assert np.array_equal(td.get_y_digit(), np.array([[1]], dtype=int)) assert np.allclose(td.get_reward(), np.array([[4]], dtype=float)) assert np.array_equal(td.get_next_x(), np.zeros([1, 4, 4], dtype=int)) assert np.array_equal(td.get_done(), np.array([[1]], dtype=bool))
def test_log2_rewards(self): # Set up training data td = training_data.training_data() td.add(np.ones([1, 4, 4]), 0, 0) td.add(np.ones([1, 4, 4]), 1, 2) td.add(np.ones([1, 4, 4]), 2, 4) td.add(np.ones([1, 4, 4]), 3, 16) td.add(np.ones([1, 4, 4]), 0, 75) td.add(np.ones([1, 4, 4]), 1, 2048) td.log2_rewards() expected_reward = np.array([[0], [1], [2], [4], [6.2288], [11]], dtype=np.float) self.assertTrue(np.allclose(td.get_reward(), expected_reward)) expected_action = np.array([[0], [1], [2], [3], [0], [1]], dtype=np.int) self.assertTrue(np.allclose(td.get_y_digit(), expected_action))
def test_add(self): # Test add with reward td = training_data.training_data() self.assertTrue( np.array_equal(td.get_x(), np.empty([0, 4, 4], dtype=np.int))) self.assertTrue( np.array_equal(td.get_y_digit(), np.empty([0, 1], dtype=np.int))) self.assertTrue( np.allclose(td.get_reward(), np.empty([0, 1], dtype=np.float))) td.add(np.ones([1, 4, 4]), 1, 4) self.assertTrue( np.array_equal(td.get_x(), np.ones([1, 4, 4], dtype=np.int))) self.assertTrue( np.array_equal(td.get_y_digit(), np.array([[1]], dtype=np.int))) self.assertTrue( np.allclose(td.get_reward(), np.array([[4]], dtype=np.float)))
def test_log2_rewards(self): # Set up training data td = training_data.training_data() td.add(np.ones([1, 4, 4]), 0, 0, np.zeros([1, 4, 4])) td.add(np.ones([1, 4, 4]), 1, 2, np.zeros([1, 4, 4])) td.add(np.ones([1, 4, 4]), 2, 4, np.zeros([1, 4, 4])) td.add(np.ones([1, 4, 4]), 3, 16, np.zeros([1, 4, 4])) td.add(np.ones([1, 4, 4]), 0, 75, np.zeros([1, 4, 4])) td.add(np.ones([1, 4, 4]), 1, 2048, np.zeros([1, 4, 4])) td.log2_rewards() expected_reward = np.array([ [0], [1], [2], [4], [6.2288], [11] ], dtype=float) assert np.allclose(td.get_reward(), expected_reward) expected_action = np.array([ [0], [1], [2], [3], [0], [1] ], dtype=int) assert np.allclose(td.get_y_digit(), expected_action)
def test_shuffle(self): td = training_data.training_data() n = 5 for i in range(n): # Use "is odd" for done td.add(np.full((1, 4, 4), i), i, i, np.full((1, 4, 4), i), (i % 2) == 1) td.shuffle() for i in range(n): # Find where this has been shuffled too index_of_val = np.where(td.get_y_digit() == i)[0].item(0) # Check that all parts of this equal i arrays = td.get_n(index_of_val) for a in arrays: if a.dtype is np.dtype(bool): assert((a == ((i % 2) == 1)).all()) else: assert((a == i).all())
def test_make_boards_unique(self): td = training_data.training_data() td.add(np.ones([1, 4, 4]), 0, 4, np.zeros([1, 4, 4])) td.add(np.zeros([1, 4, 4]), 1, 2, np.ones([1, 4, 4])) td.add(np.ones([1, 4, 4]), 2, 16, np.zeros([1, 4, 4])) td.add(np.zeros([1, 4, 4]), 3, 2, np.ones([1, 4, 4])) td.make_boards_unique() expected_x = np.array( [[[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]], dtype=np.int) expected_y_digit = np.array([[0], [1]], dtype=np.int) expected_reward = np.array([[4], [2]], dtype=np.float) expected_next_x = np.array( [[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]]], dtype=np.int) assert np.array_equal(td.get_x(), expected_x) assert np.array_equal(td.get_y_digit(), expected_y_digit) assert np.allclose(td.get_reward(), expected_reward) assert np.array_equal(td.get_next_x(), expected_next_x)
def gather_training_data(env, seed=None): """Gather training data from letting the user play the game""" # Data is a list of input and outputs data = training_data.training_data() # Initialise seed for environment if seed: env.seed(seed) else: env.seed() observation = env.reset() print("User cursor keys to play, q to quit") try: while True: # Loop around performing moves action = None env.render() # Ask user for action # Read and discard the keyup event print("Read action {}".format(action)) # Add this data to the data collection new_observation, reward, done, info = env.step(action) if np.array_equal(observation, new_observation): print("Suppressing recording of illegal move") else: data.add(observation, action, reward) observation = new_observation print() if done: # Draw final board env.render() print("End of game") break except Exiting: print("Exiting...") return data
def test_augment(self): td = training_data.training_data() initial_board = np.array([[1, 1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]) td.add(initial_board, 1, 4) td.augment() self.assertEqual(td.size(), 8) expected_x = np.array( [[[1, 1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 1, 1], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 1], [0, 0, 0, 1], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 0, 1]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 1, 1]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [1, 1, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0]], [[1, 0, 0, 0], [1, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]], dtype=np.int) expected_y_digit = np.array([[1], [3], [2], [0], [3], [1], [0], [2]], dtype=np.int) expected_reward = np.array([[4], [4], [4], [4], [4], [4], [4], [4]], dtype=np.float) self.assertTrue(np.array_equal(td.get_x(), expected_x)) self.assertTrue(np.array_equal(td.get_y_digit(), expected_y_digit)) self.assertTrue(np.allclose(td.get_reward(), expected_reward))
import numpy as np import face_detect as face_detect import training_data as training_data label = [] def predict(test_img): img = cv2.imread(test_img).copy() print "\n\n\n" print "Face Prediction Running -\-" face, rect, length = face_detect.face_detect(test_img) print len(face), "faces detected." for i in range(0, len(face)): labeltemp, confidence = face_recognizer.predict(face[i]) label.append(labeltemp) return img, label faces, labels = training_data.training_data("training-data") face_recognizer = cv2.face.LBPHFaceRecognizer_create() face_recognizer.train(faces, np.array(labels)) # Read the test image. test_img = "test-data/test.jpg" predicted_img, label = predict(test_img) cv2.destroyAllWindows() cv2.waitKey(1) cv2.destroyAllWindows() print "Recognized faces = ", label