Beispiel #1
0
def test_pytorch_approximator():
    np.random.seed(88)
    torch.manual_seed(88)

    noise = 1e-3**2

    a = np.random.rand(1000, 4)

    k = np.random.rand(4, 2)
    b = np.sin(a).dot(k) + np.random.randn(1000, 2) * noise

    approximator = Regressor(PyTorchApproximator,
                             input_shape=(4, ),
                             output_shape=(2, ),
                             network=ExampleNet,
                             optimizer={
                                 'class': optim.Adam,
                                 'params': {}
                             },
                             loss=F.mse_loss,
                             n_neurons=100,
                             n_hidden=1,
                             n_epochs=200,
                             batch_size=100,
                             quiet=True)

    approximator.fit(a, b)

    bhat = approximator.predict(a)
    error = np.linalg.norm(b - bhat, 'fro') / 1000
    error_inf = np.max(np.abs(b - bhat))

    print(b[:10])

    print(bhat[:10])

    print(error_inf)

    assert error < 2e-4

    gradient = approximator.diff(a[0])
    assert gradient.shape[1] == 2

    old_weights = approximator.get_weights()
    approximator.set_weights(old_weights)
    new_weights = approximator.get_weights()

    assert np.array_equal(new_weights, old_weights)

    random_weights = np.random.randn(*old_weights.shape).astype(np.float32)
    approximator.set_weights(random_weights)
    random_weight_new = approximator.get_weights()

    assert np.array_equal(random_weights, random_weight_new)
    assert not np.any(np.equal(random_weights, old_weights))

    bhat_random = approximator.predict(a)

    assert not np.array_equal(bhat, bhat_random)
def test_pytorch_approximator():
    np.random.seed(1)
    torch.manual_seed(1)

    n_actions = 2
    s = np.random.rand(1000, 4)
    a = np.random.randint(n_actions, size=(1000, 1))
    q = np.random.rand(1000)

    approximator = Regressor(TorchApproximator, input_shape=(4,),
                             output_shape=(2,), n_actions=n_actions,
                             network=ExampleNet,
                             optimizer={'class': optim.Adam,
                                        'params': {}}, loss=F.mse_loss,
                             batch_size=100, quiet=True)

    approximator.fit(s, a, q, n_epochs=20)

    x_s = np.random.rand(2, 4)
    x_a = np.random.randint(n_actions, size=(2, 1))
    y = approximator.predict(x_s, x_a)
    y_test = np.array([0.37191153, 0.5920861])

    assert np.allclose(y, y_test)

    y = approximator.predict(x_s)
    y_test = np.array([[0.47908658, 0.37191153],
                       [0.5920861, 0.27575058]])

    assert np.allclose(y, y_test)

    gradient = approximator.diff(x_s[0], x_a[0])
    gradient_test = np.array([0., 0., 0., 0., 0.02627479, 0.76513696,
                              0.6672573, 0.35979462, 0., 1.])
    assert np.allclose(gradient, gradient_test)

    gradient = approximator.diff(x_s[0])
    gradient_test = np.array([[0.02627479, 0.], [0.76513696, 0.],
                              [0.6672573, 0.], [0.35979462, 0.],
                              [0., 0.02627479], [0., 0.76513696],
                              [0., 0.6672573], [0., 0.35979462], [1, 0.],
                              [0., 1.]])
    assert np.allclose(gradient, gradient_test)

    old_weights = approximator.get_weights()
    approximator.set_weights(old_weights)
    new_weights = approximator.get_weights()

    assert np.array_equal(new_weights, old_weights)

    random_weights = np.random.randn(*old_weights.shape).astype(np.float32)
    approximator.set_weights(random_weights)
    random_weight_new = approximator.get_weights()

    assert np.array_equal(random_weights, random_weight_new)
    assert not np.any(np.equal(random_weights, old_weights))
Beispiel #3
0
def test_linear_approximator():
    np.random.seed(88)

    noise = 1e-3

    a = np.random.rand(1000, 3)

    k = np.random.rand(3, 2)
    b = a.dot(k) + np.random.randn(1000, 2) * noise

    approximator = Regressor(LinearApproximator,
                             input_shape=(3, ),
                             output_shape=(2, ))

    approximator.fit(a, b)

    khat = approximator.get_weights()

    deltaK = (khat - k.T.flatten())

    assert np.linalg.norm(deltaK) < noise

    point = np.random.randn(3, )
    derivative = approximator.diff(point)

    lp = len(point)
    for i in range(derivative.shape[1]):
        assert (derivative[i * lp:(i + 1) * lp, i] == point).all()
def test_deterministic_policy():
    np.random.seed(88)

    n_dims = 5

    approximator = Regressor(LinearApproximator,
                             input_shape=(n_dims, ),
                             output_shape=(2, ))

    pi = DeterministicPolicy(approximator)

    w_new = np.random.rand(pi.weights_size)

    w_old = pi.get_weights()
    pi.set_weights(w_new)

    assert np.array_equal(w_new, approximator.get_weights())
    assert not np.array_equal(w_old, w_new)
    assert np.array_equal(w_new, pi.get_weights())

    s_test_1 = np.random.randn(5)
    s_test_2 = np.random.randn(5)
    a_test = approximator.predict(s_test_1)

    assert pi.get_regressor() == approximator

    assert pi(s_test_1, a_test) == 1
    assert pi(s_test_2, a_test) == 0

    a_stored = np.array([-1.86941072, -0.1789696])
    assert np.allclose(pi.draw_action(s_test_1), a_stored)
def test_linear_approximator():
    np.random.seed(1)

    # Generic regressor
    a = np.random.rand(1000, 3)

    k = np.random.rand(3, 2)
    b = a.dot(k) + np.random.randn(1000, 2)

    approximator = Regressor(LinearApproximator,
                             input_shape=(3, ),
                             output_shape=(2, ))

    approximator.fit(a, b)

    x = np.random.rand(2, 3)
    y = approximator.predict(x)
    y_test = np.array([[0.57638247, 0.1573216], [0.11388247, 0.24123678]])

    assert np.allclose(y, y_test)

    point = np.random.randn(3, )
    derivative = approximator.diff(point)

    lp = len(point)
    for i in range(derivative.shape[1]):
        assert (derivative[i * lp:(i + 1) * lp, i] == point).all()

    old_weights = approximator.get_weights()
    approximator.set_weights(old_weights)
    new_weights = approximator.get_weights()

    assert np.array_equal(new_weights, old_weights)

    random_weights = np.random.randn(*old_weights.shape).astype(np.float32)
    approximator.set_weights(random_weights)
    random_weight_new = approximator.get_weights()

    assert np.array_equal(random_weights, random_weight_new)
    assert not np.any(np.equal(random_weights, old_weights))

    # Action regressor + Ensemble
    n_actions = 2
    s = np.random.rand(1000, 3)
    a = np.random.randint(n_actions, size=(1000, 1))
    q = np.random.rand(1000)

    approximator = Regressor(LinearApproximator,
                             input_shape=(3, ),
                             n_actions=n_actions,
                             n_models=5)

    approximator.fit(s, a, q)

    x_s = np.random.rand(2, 3)
    x_a = np.random.randint(n_actions, size=(2, 1))
    y = approximator.predict(x_s, x_a, prediction='mean')
    y_test = np.array([0.49225698, 0.69660881])
    assert np.allclose(y, y_test)

    y = approximator.predict(x_s, x_a, prediction='sum')
    y_test = np.array([2.46128492, 3.48304404])
    assert np.allclose(y, y_test)

    y = approximator.predict(x_s, x_a, prediction='min')
    y_test = np.array([[0.49225698, 0.69660881]])
    assert np.allclose(y, y_test)

    y = approximator.predict(x_s)
    y_test = np.array([[0.49225698, 0.44154141], [0.69660881, 0.69060195]])
    assert np.allclose(y, y_test)

    approximator = Regressor(LinearApproximator,
                             input_shape=(3, ),
                             n_actions=n_actions)

    approximator.fit(s, a, q)

    gradient = approximator.diff(x_s[0], x_a[0])
    gradient_test = np.array([0.88471362, 0.11666548, 0.45466254, 0., 0., 0.])

    assert np.allclose(gradient, gradient_test)