def test_pytorch_approximator(): np.random.seed(1) torch.manual_seed(1) n_actions = 2 s = np.random.rand(1000, 4) a = np.random.randint(n_actions, size=(1000, 1)) q = np.random.rand(1000) approximator = Regressor(TorchApproximator, input_shape=(4,), output_shape=(2,), n_actions=n_actions, network=ExampleNet, optimizer={'class': optim.Adam, 'params': {}}, loss=F.mse_loss, batch_size=100, quiet=True) approximator.fit(s, a, q, n_epochs=20) x_s = np.random.rand(2, 4) x_a = np.random.randint(n_actions, size=(2, 1)) y = approximator.predict(x_s, x_a) y_test = np.array([0.37191153, 0.5920861]) assert np.allclose(y, y_test) y = approximator.predict(x_s) y_test = np.array([[0.47908658, 0.37191153], [0.5920861, 0.27575058]]) assert np.allclose(y, y_test) gradient = approximator.diff(x_s[0], x_a[0]) gradient_test = np.array([0., 0., 0., 0., 0.02627479, 0.76513696, 0.6672573, 0.35979462, 0., 1.]) assert np.allclose(gradient, gradient_test) gradient = approximator.diff(x_s[0]) gradient_test = np.array([[0.02627479, 0.], [0.76513696, 0.], [0.6672573, 0.], [0.35979462, 0.], [0., 0.02627479], [0., 0.76513696], [0., 0.6672573], [0., 0.35979462], [1, 0.], [0., 1.]]) assert np.allclose(gradient, gradient_test) old_weights = approximator.get_weights() approximator.set_weights(old_weights) new_weights = approximator.get_weights() assert np.array_equal(new_weights, old_weights) random_weights = np.random.randn(*old_weights.shape).astype(np.float32) approximator.set_weights(random_weights) random_weight_new = approximator.get_weights() assert np.array_equal(random_weights, random_weight_new) assert not np.any(np.equal(random_weights, old_weights))
def test_linear_approximator(): np.random.seed(1) # Generic regressor a = np.random.rand(1000, 3) k = np.random.rand(3, 2) b = a.dot(k) + np.random.randn(1000, 2) approximator = Regressor(LinearApproximator, input_shape=(3, ), output_shape=(2, )) approximator.fit(a, b) x = np.random.rand(2, 3) y = approximator.predict(x) y_test = np.array([[0.57638247, 0.1573216], [0.11388247, 0.24123678]]) assert np.allclose(y, y_test) point = np.random.randn(3, ) derivative = approximator.diff(point) lp = len(point) for i in range(derivative.shape[1]): assert (derivative[i * lp:(i + 1) * lp, i] == point).all() old_weights = approximator.get_weights() approximator.set_weights(old_weights) new_weights = approximator.get_weights() assert np.array_equal(new_weights, old_weights) random_weights = np.random.randn(*old_weights.shape).astype(np.float32) approximator.set_weights(random_weights) random_weight_new = approximator.get_weights() assert np.array_equal(random_weights, random_weight_new) assert not np.any(np.equal(random_weights, old_weights)) # Action regressor + Ensemble n_actions = 2 s = np.random.rand(1000, 3) a = np.random.randint(n_actions, size=(1000, 1)) q = np.random.rand(1000) approximator = Regressor(LinearApproximator, input_shape=(3, ), n_actions=n_actions, n_models=5) approximator.fit(s, a, q) x_s = np.random.rand(2, 3) x_a = np.random.randint(n_actions, size=(2, 1)) y = approximator.predict(x_s, x_a, prediction='mean') y_test = np.array([0.49225698, 0.69660881]) assert np.allclose(y, y_test) y = approximator.predict(x_s, x_a, prediction='sum') y_test = np.array([2.46128492, 3.48304404]) assert np.allclose(y, y_test) y = approximator.predict(x_s, x_a, prediction='min') y_test = np.array([[0.49225698, 0.69660881]]) assert np.allclose(y, y_test) y = approximator.predict(x_s) y_test = np.array([[0.49225698, 0.44154141], [0.69660881, 0.69060195]]) assert np.allclose(y, y_test) approximator = Regressor(LinearApproximator, input_shape=(3, ), n_actions=n_actions) approximator.fit(s, a, q) gradient = approximator.diff(x_s[0], x_a[0]) gradient_test = np.array([0.88471362, 0.11666548, 0.45466254, 0., 0., 0.]) assert np.allclose(gradient, gradient_test)
def test_cmac_approximator(): np.random.seed(1) # Generic regressor x = np.random.rand(1000, 2) k1 = np.random.rand(2) k2 = np.random.rand(2) y = np.array( [np.sin(x.dot(k1) * 2 * np.pi), np.sin(x.dot(k2) * 2 * np.pi)]).T tilings = Tiles.generate(10, [10, 10], np.zeros(2), np.ones(2)) approximator = Regressor(CMAC, tilings=tilings, input_shape=(2, ), output_shape=(2, )) approximator.fit(x, y) x = np.random.rand(2, 2) y_hat = approximator.predict(x) y_true = np.array( [np.sin(x.dot(k1) * 2 * np.pi), np.sin(x.dot(k2) * 2 * np.pi)]).T y_test = np.array([[-0.73787754, 0.90673493], [-0.94972964, -0.72380013]]) assert np.allclose(y_hat, y_test) point = np.random.rand(2) derivative = approximator.diff(point) assert np.array_equal(np.sum(derivative, axis=0), np.ones(2) * 10) assert len(derivative) == approximator.weights_size old_weights = approximator.get_weights() approximator.set_weights(old_weights) new_weights = approximator.get_weights() assert np.array_equal(new_weights, old_weights) random_weights = np.random.randn(*old_weights.shape).astype(np.float32) approximator.set_weights(random_weights) random_weight_new = approximator.get_weights() assert np.array_equal(random_weights, random_weight_new) assert not np.any(np.equal(random_weights, old_weights)) # Action regressor + Ensemble n_actions = 2 s = np.random.rand(1000, 3) a = np.random.randint(n_actions, size=(1000, 1)) q = np.random.rand(1000) tilings = Tiles.generate(10, [10, 10, 10], np.zeros(3), np.ones(3)) approximator = Regressor(CMAC, tilings=tilings, input_shape=(3, ), n_actions=n_actions, n_models=5) approximator.fit(s, a, q) x_s = np.random.rand(2, 3) x_a = np.random.randint(n_actions, size=(2, 1)) y = approximator.predict(x_s, x_a, prediction='mean') y_test = np.array([[0.10921918, 0.09923379]]) assert np.allclose(y, y_test) y = approximator.predict(x_s, x_a, prediction='sum') y_test = np.array([0.54609592, 0.49616895]) assert np.allclose(y, y_test) y = approximator.predict(x_s, x_a, prediction='min') y_test = np.array([[0.10921918, 0.09923379]]) assert np.allclose(y, y_test) y = approximator.predict(x_s) y_test = np.array([[0.07606651, 0.10921918], [0.40698114, 0.09923379]]) assert np.allclose(y, y_test)
def test_cmac_approximator(): np.random.seed(1) # Generic regressor x = np.random.rand(1000, 2) k1 = np.random.rand(2) k2 = np.random.rand(2) y = np.array( [np.sin(x.dot(k1) * 2 * np.pi), np.sin(x.dot(k2) * 2 * np.pi)]).T tilings = Tiles.generate(10, [10, 10], np.zeros(2), np.ones(2)) approximator = Regressor(CMAC, tilings=tilings, input_shape=(2, ), output_shape=(2, )) approximator.fit(x, y) x = np.random.rand(2, 2) y_hat = approximator.predict(x) y_true = np.array( [np.sin(x.dot(k1) * 2 * np.pi), np.sin(x.dot(k2) * 2 * np.pi)]).T y_test = np.array([[-0.73581504, 0.90877225], [-0.95854488, -0.72429239]]) assert np.allclose(y_hat, y_test) point = np.random.rand(2) derivative = approximator.diff(point) assert np.array_equal(np.sum(derivative, axis=0), np.ones(2) * 10) assert len(derivative) == approximator.weights_size old_weights = approximator.get_weights() approximator.set_weights(old_weights) new_weights = approximator.get_weights() assert np.array_equal(new_weights, old_weights) random_weights = np.random.randn(*old_weights.shape).astype(np.float32) approximator.set_weights(random_weights) random_weight_new = approximator.get_weights() assert np.array_equal(random_weights, random_weight_new) assert not np.any(np.equal(random_weights, old_weights)) # Action regressor + Ensemble n_actions = 2 s = np.random.rand(1000, 3) a = np.random.randint(n_actions, size=(1000, 1)) q = np.random.rand(1000) tilings = Tiles.generate(10, [10, 10, 10], np.zeros(3), np.ones(3)) approximator = Regressor(CMAC, tilings=tilings, input_shape=(3, ), n_actions=n_actions, n_models=5) approximator.fit(s, a, q) np.random.seed(2) x_s = np.random.rand(2, 3) x_a = np.random.randint(n_actions, size=(2, 1)) y = approximator.predict(x_s, x_a, prediction='mean') y_test = np.array([[0.56235045, 0.25080909]]) assert np.allclose(y, y_test) y = approximator.predict(x_s, x_a, prediction='sum') y_test = np.array([2.81175226, 1.25404543]) assert np.allclose(y, y_test) y = approximator.predict(x_s, x_a, prediction='min') y_test = np.array([0.56235045, 0.25080909]) assert np.allclose(y, y_test) y = approximator.predict(x_s) y_test = np.array([[0.10367145, 0.56235045], [0.05575822, 0.25080909]]) assert np.allclose(y, y_test)