Ejemplo n.º 1
0
def test_features():
    # Test that optimisation runs for a full-fledged GPAR.
    reg = GPARRegressor(replace=True, scale=1.0,
                        per=True, per_period=1.0, per_decay=10.0,
                        input_linear=True, input_linear_scale=0.1,
                        linear=True, linear_scale=1.0,
                        nonlinear=True, nonlinear_scale=1.0,
                        rq=True, noise=0.1)
    x = np.stack([np.linspace(0, 10, 20),
                  np.linspace(10, 20, 20)], axis=1)
    y = reg.sample(x, p=2)
    reg.fit(x, y, iters=10)
Ejemplo n.º 2
0
def test_logpdf_differentiable():
    reg = GPARRegressor(replace=False, impute=False,
                        linear=True, linear_scale=1., nonlinear=False,
                        noise=1e-8, normalise_y=False)
    x = np.linspace(0, 5, 10)
    y = reg.sample(x, p=2, latent=True)

    # Test that gradient calculation works.
    reg.vs.requires_grad(True)
    for var in reg.vs.get_vars():
        assert var.grad is None
    reg.logpdf(torch.tensor(x), torch.tensor(y)).backward()
    for var in reg.vs.get_vars():
        assert var.grad is not None
Ejemplo n.º 3
0
def test_logpdf(x, w):
    # Sample some data from a "sensitive" GPAR.
    reg = GPARRegressor(
        replace=False,
        impute=False,
        nonlinear=True,
        nonlinear_scale=0.1,
        linear=True,
        linear_scale=10.0,
        noise=1e-2,
        normalise_y=False,
    )
    y = reg.sample(x, w, p=2, latent=True)

    # Extract models.
    gpar = _construct_gpar(reg, reg.vs, B.shape(B.uprank(x))[1], 2)
    f1, e1 = gpar.layers[0]()
    f2, e2 = gpar.layers[1]()

    # Test computation under prior.
    x1 = x
    x2 = B.concat(B.uprank(x), y[:, 0:1], axis=1)
    if w is not None:
        x1 = WeightedUnique(x1, w[:, 0])
        x2 = WeightedUnique(x2, w[:, 1])
    logpdf1 = (f1 + e1)(x1).logpdf(y[:, 0])
    logpdf2 = (f2 + e2)(x2).logpdf(y[:, 1])
    approx(reg.logpdf(x, y, w), logpdf1 + logpdf2, atol=1e-6)

    # Test computation under posterior.
    post1 = f1.measure | ((f1 + e1)(x1), y[:, 0])
    post2 = f2.measure | ((f2 + e2)(x2), y[:, 1])
    e1_post = GP(e1.mean, e1.kernel, measure=post1)
    e2_post = GP(e2.mean, e2.kernel, measure=post2)
    logpdf1 = (post1(f1) + e1_post)(x1).logpdf(y[:, 0])
    logpdf2 = (post2(f2) + e2_post)(x2).logpdf(y[:, 1])
    with pytest.raises(RuntimeError):
        reg.logpdf(x, y, w, posterior=True)
    reg.condition(x, y, w)
    approx(reg.logpdf(x, y, w, posterior=True), logpdf1 + logpdf2, atol=1e-6)

    # Test that sampling missing gives a stochastic estimate.
    y[::2, 0] = np.nan
    all_different(
        reg.logpdf(x, y, w, sample_missing=True),
        reg.logpdf(x, y, w, sample_missing=True),
    )
Ejemplo n.º 4
0
def test_logpdf(x, w):
    # Sample some data from a "sensitive" GPAR.
    reg = GPARRegressor(
        replace=False,
        impute=False,
        nonlinear=True,
        nonlinear_scale=0.1,
        linear=True,
        linear_scale=10.0,
        noise=1e-2,
        normalise_y=False,
    )
    y = reg.sample(x, w, p=2, latent=True)

    # Extract models.
    gpar = _construct_gpar(reg, reg.vs, B.shape(B.uprank(x))[1], 2)
    f1, noise1 = gpar.layers[0]()
    f2, noise2 = gpar.layers[1]()

    if w is not None:
        noise1 = noise1 / w[:, 0]
        noise2 = noise2 / w[:, 1]

    # Test computation under prior.
    x1 = x
    x2 = B.concat(B.uprank(x), y[:, 0:1], axis=1)
    logpdf1 = f1(x1, noise1).logpdf(y[:, 0])
    logpdf2 = f2(x2, noise2).logpdf(y[:, 1])
    approx(reg.logpdf(x, y, w), logpdf1 + logpdf2, atol=1e-6)

    # Test computation under posterior.
    f1_post = f1 | (f1(x1, noise1), y[:, 0])
    f2_post = f2 | (f2(x2, noise2), y[:, 1])
    logpdf1 = f1_post(x1, noise1).logpdf(y[:, 0])
    logpdf2 = f2_post(x2, noise2).logpdf(y[:, 1])
    with pytest.raises(RuntimeError):
        reg.logpdf(x, y, w, posterior=True)
    reg.condition(x, y, w)
    approx(reg.logpdf(x, y, w, posterior=True), logpdf1 + logpdf2, atol=1e-6)

    # Test that sampling missing gives a stochastic estimate.
    y[::2, 0] = np.nan
    all_different(
        reg.logpdf(x, y, w, sample_missing=True),
        reg.logpdf(x, y, w, sample_missing=True),
    )
Ejemplo n.º 5
0
def test_logpdf():
    # Sample some data from a "sensitive" GPAR.
    reg = GPARRegressor(replace=False,
                        impute=False,
                        nonlinear=True,
                        nonlinear_scale=0.1,
                        linear=True,
                        linear_scale=10.,
                        noise=1e-4,
                        normalise_y=False)
    x = np.linspace(0, 5, 10)
    y = reg.sample(x, p=2, latent=True)

    # Extract models.
    gpar = _construct_gpar(reg, reg.vs, 1, 2)
    f1, e1 = gpar.layers[0]()
    f2, e2 = gpar.layers[1]()

    # Test computation under prior.
    logpdf1 = (f1 + e1)(B.array(x)).logpdf(B.array(y[:, 0]))
    x_stack = np.concatenate([x[:, None], y[:, 0:1]], axis=1)
    logpdf2 = (f2 + e2)(B.array(x_stack)).logpdf(B.array(y[:, 1]))
    yield approx, reg.logpdf(x, y), logpdf1 + logpdf2, 6

    # Test computation under posterior.
    e1_post = GP(e1.kernel, e1.mean, graph=e1.graph)
    e2_post = GP(e2.kernel, e2.mean, graph=e2.graph)
    f1_post = f1 | ((f1 + e1)(B.array(x)), B.array(y[:, 0]))
    f2_post = f2 | ((f2 + e2)(B.array(x_stack)), B.array(y[:, 1]))
    logpdf1 = (f1_post + e1_post)(B.array(x)).logpdf(B.array(y[:, 0]))
    logpdf2 = (f2_post + e2_post)(B.array(x_stack)).logpdf(B.array(y[:, 1]))
    yield raises, RuntimeError, lambda: reg.logpdf(x, y, posterior=True)
    reg.fit(x, y, iters=0)
    yield approx, reg.logpdf(x, y, posterior=True), logpdf1 + logpdf2, 6

    # Test that sampling missing gives a stochastic estimate.
    y[::2, 0] = np.nan
    yield ge, \
          np.abs(reg.logpdf(x, y, sample_missing=True) -
                 reg.logpdf(x, y, sample_missing=True)), \
          1e-3
Ejemplo n.º 6
0
    # Draw functions depending on each other in complicated ways.
    f1 = -np.sin(10 * np.pi * (x + 1)) / (2 * x + 1) - x**4
    f2 = np.cos(f1)**2 + np.sin(3 * x)
    f3 = f2 * f1**2 + 3 * x
    f = np.stack((f1, f2, f3), axis=0).T

    # Add noise and subsample.
    y = f + noise * np.random.randn(n, 3)
    x_obs, y_obs = x[::8], y[::8]

    # Fit and predict GPAR.
    model = GPARRegressor(scale=0.1,
                          linear=True,
                          linear_scale=10.,
                          nonlinear=True,
                          nonlinear_scale=0.1,
                          noise=0.1,
                          impute=True,
                          replace=False,
                          normalise_y=False)
    model.fit(x_obs, y_obs)
    means, lowers, uppers = \
        model.predict(x, num_samples=100, credible_bounds=True, latent=True)

    # Fit and predict independent GPs: set markov=0.
    igp = GPARRegressor(scale=0.1,
                        linear=True,
                        linear_scale=10.,
                        nonlinear=True,
                        nonlinear_scale=0.1,
                        noise=0.1,
Ejemplo n.º 7
0
def test_sample_and_predict():
    reg = GPARRegressor(replace=False,
                        impute=False,
                        linear=True,
                        linear_scale=1.,
                        nonlinear=False,
                        noise=1e-8,
                        normalise_y=False)
    x = np.linspace(0, 5, 10)

    # Test checks.
    yield raises, ValueError, lambda: reg.sample(x)
    yield raises, RuntimeError, lambda: reg.sample(x, posterior=True)

    # Test that output is simplified correctly.
    yield isinstance, reg.sample(x, p=2), np.ndarray
    yield isinstance, reg.sample(x, p=2, num_samples=2), list

    # Test that it produces random samples. Not sure how to test correctness.
    yield ge, np.sum(np.abs(reg.sample(x, p=2) - reg.sample(x, p=2))), 1e-2
    yield ge, np.sum(
        np.abs(
            reg.sample(x, p=2, latent=True) -
            reg.sample(x, p=2, latent=True))), 1e-3

    # Test that mean of posterior samples are around the data.
    y = reg.sample(x, p=2)
    reg.fit(x, y, iters=0)
    yield approx, y, np.mean(reg.sample(x, posterior=True, num_samples=20),
                             axis=0), 4
    yield approx, y, np.mean(reg.sample(x,
                                        latent=True,
                                        posterior=True,
                                        num_samples=20),
                             axis=0), 4

    # Test that prediction is around the data.
    yield approx, y, reg.predict(x, num_samples=20), 4
    yield approx, y, reg.predict(x, latent=True, num_samples=20), 4

    # Test that prediction is confident.
    _, lowers, uppers = reg.predict(x, num_samples=10, credible_bounds=True)
    yield ok, np.less_equal(uppers - lowers, 1e-3).all()
Ejemplo n.º 8
0
def test_inducing_points_uprank():
    reg = GPARRegressor(x_ind=np.linspace(0, 10, 20))
    assert reg.x_ind is not None
    assert B.rank(reg.x_ind) == 2
Ejemplo n.º 9
0
def test_get_variables():
    gpar = GPARRegressor()
    gpar.vs.get(init=1.0, name='variable')
    assert list(gpar.get_variables().items()) == [('variable', 1.0)]
    args.experiment = 'synthetic'

    x1 = np.linspace(1, 5, 5)
    x2 = np.concatenate([
        np.linspace(1, 9, 9),
        np.linspace(10, 98, 45),
        np.linspace(100, 980, 45)
    ])
    xx1, xx2 = np.meshgrid(x1, x2)
    x = np.stack([np.ravel(xx1), np.ravel(xx2)], axis=1)

    model = GPARRegressor(scale=[2., .3],
                          scale_tie=True,
                          linear=True,
                          linear_scale=10.,
                          linear_with_inputs=False,
                          nonlinear=False,
                          nonlinear_with_inputs=False,
                          markov=1,
                          replace=True,
                          noise=args.noise)

    n = 10

    y = model.sample(transform_x(x), p=n, latent=False)

else:
    # Load data.
    x = np.genfromtxt(
        os.path.join(data_dir, args.data, args.experiment,
                     f'x_{"rmse" if args.rmse else "loglik"}.txt'))
    y = np.genfromtxt(
Ejemplo n.º 11
0
def test_scale_tying():
    reg = GPARRegressor(scale_tie=True)
    reg.sample(np.linspace(0, 10, 20), p=2)  # Instantiate variables.
    vs = reg.get_variables()
    assert '0/input/scales' in vs
    assert '1/input/scales' not in vs
Ejemplo n.º 12
0
def test_get_variables():
    gpar = GPARRegressor()
    gpar.vs.get(init=1.0, name='variable')
    yield eq, list(gpar.get_variables().items()), [('variable', 1.0)]
Ejemplo n.º 13
0
    x2 = np.concatenate([
        np.linspace(1, 9, 9),
        np.linspace(10, 95, 18),
        np.linspace(100, 950, 18)
    ])
    xx1, xx2 = np.meshgrid(x1, x2)
    x = np.stack([np.ravel(xx1), np.ravel(xx2)], axis=1)

    if len(args.synthetic_scales) != 2:
        raise ValueError('Requires exactly 2 synthetic scale arguments')

    model = GPARRegressor(scale=args.synthetic_scales,
                          scale_tie=True,
                          linear=True,
                          linear_scale=10.,
                          input_linear=False,
                          nonlinear=False,
                          markov=None,
                          replace=True,
                          noise=args.noise)

    n = 3

    y = model.sample(transform_x(x), p=n, latent=False)

else:
    # Load data.
    x = np.genfromtxt(
        os.path.join(data_dir, args.data, args.experiment,
                     f'x_{"rmse" if args.rmse else "loglik"}.txt'))
    y = np.genfromtxt(
Ejemplo n.º 14
0
def test_fit():
    reg = GPARRegressor(replace=False,
                        impute=False,
                        normalise_y=True,
                        transform_y=squishing_transform)
    x = np.linspace(0, 5, 10)
    y = reg.sample(x, p=2)

    # TODO: Remove this once greedy search is implemented.
    yield raises, NotImplementedError, lambda: reg.fit(x, y, greedy=True)

    # Test that data is correctly transformed if it has an output with zero
    # variance.
    reg.fit(x, y, iters=0)
    yield ok, (~B.isnan(reg.y)).numpy().all()
    y_pathological = y.copy()
    y_pathological[:, 0] = 1
    reg.fit(x, y_pathological, iters=0)
    yield ok, (~B.isnan(reg.y)).numpy().all()

    # Test transformation and normalisation of outputs.
    z = B.linspace(-1, 1, 10, dtype=torch.float64)
    z = B.stack([z, 2 * z], axis=1)
    yield allclose, reg._untransform_y(reg._transform_y(z)), z
    yield allclose, reg._unnormalise_y(reg._normalise_y(z)), z

    # Test that fitting runs without issues.
    vs = reg.vs.detach()
    yield lambda x_, y_: reg.fit(x_, y_, fix=False), x, y
    reg.vs = vs
    yield lambda x_, y_: reg.fit(x, y, fix=True), x, y
    f'{"output_linear-" if args.output_linear else ""}'
    f'{"output_nonlinear-" if args.output_nonlinear else ""}'
    f'markov_{args.markov}-'
    f'{args.seed}')

if os.path.isfile(fig_path + '.png'):
    print('Experiment already run. Exiting')
    sys.exit(0)

np.random.seed(args.seed)

model = GPARRegressor(scale=[1., .5],
                      scale_tie=args.scale_tie,
                      linear=args.output_linear,
                      linear_scale=10.,
                      input_linear=args.input_linear,
                      input_linear_scale=10.,
                      nonlinear=args.output_nonlinear,
                      markov=args.markov,
                      replace=True,
                      noise=0.01)

# model2 = GPARRegressor(scale=[1., .5], scale_tie=True,
#                           linear=True, linear_scale=10., input_linear=False,
#                           nonlinear=False,
#                           markov=1,
#                           replace=True,
#                           noise=0.01)

# Load data.
formatting_args = (data_dir, args.data, args.experiment,
                   'rmse' if args.rmse else 'loglik')
Ejemplo n.º 16
0
import matplotlib.pyplot as plt
import numpy as np
import wbml.plot

from gpar.regression import GPARRegressor

x = np.linspace(0, 1, 100)
model = GPARRegressor(scale=0.1,
                      linear=False, nonlinear=True, nonlinear_scale=0.5,
                      impute=True, replace=True,
                      noise=0.1, normalise_y=True)

# Sample observations and discard some.
y = model.sample(x, p=3)
y_obs = y.copy()
y_obs[np.random.permutation(100)[:25], 0] = np.nan
y_obs[np.random.permutation(100)[:50], 1] = np.nan
y_obs[np.random.permutation(100)[:75], 2] = np.nan

# Fit model and predict.
model.fit(x, y)
means, lowers, uppers = \
    model.predict(x, num_samples=200, latent=False, credible_bounds=True)

# Plot the result.
plt.figure(figsize=(8, 6))

for i in range(3):
    plt.subplot(3, 1, i + 1)
    plt.plot(x, means[:, i], label='Prediction', style='pred')
    plt.fill_between(x, lowers[:, i], uppers[:, i], style='pred')
Ejemplo n.º 17
0
def test_sample_and_predict(x, w):
    # Use output transform to ensure that is handled correctly.
    reg = GPARRegressor(
        replace=False,
        impute=False,
        linear=True,
        linear_scale=1.0,
        nonlinear=False,
        noise=1e-8,
        normalise_y=False,
        transform_y=squishing_transform,
    )

    # Test checks.
    with pytest.raises(ValueError):
        reg.sample(x, w)
    with pytest.raises(RuntimeError):
        reg.sample(x, w, posterior=True)

    # Test that output is simplified correctly.
    assert isinstance(reg.sample(x, w, p=2), np.ndarray)
    assert isinstance(reg.sample(x, w, p=2, num_samples=2), list)

    # Test that it produces random samples. Not sure how to test correctness.
    all_different(reg.sample(x, w, p=2), reg.sample(x, w, p=2))
    all_different(reg.sample(x, w, p=2, latent=True),
                  reg.sample(x, w, p=2, latent=True))

    # Test that mean of posterior samples are around the data.
    y = reg.sample(x, w, p=2)
    reg.condition(x, y, w)
    approx(y,
           np.mean(reg.sample(x, w, posterior=True, num_samples=100), axis=0),
           atol=5e-2)
    approx(
        y,
        np.mean(reg.sample(x, w, latent=True, posterior=True, num_samples=100),
                axis=0),
        atol=5e-2,
    )

    # Test that prediction is around the data.
    approx(y, reg.predict(x, w, num_samples=100), atol=5e-2)
    approx(y, reg.predict(x, w, latent=True, num_samples=100), atol=5e-2)

    # Test that prediction is confident.
    _, lowers, uppers = reg.predict(x,
                                    w,
                                    num_samples=100,
                                    credible_bounds=True)
    approx(uppers, lowers, atol=5e-2)
Ejemplo n.º 18
0
def test_scale_tying(x, w):
    reg = GPARRegressor(scale_tie=True)
    reg.sample(x, w, p=2)  # Instantiate variables.
    vs = reg.get_variables()
    assert "0/input/scales" in vs
    assert "1/input/scales" not in vs
Ejemplo n.º 19
0
def test_scale_tying():
    reg = GPARRegressor(scale_tie=True)
    reg.sample(np.linspace(0, 10, 20), p=2)  # Instantiate variables.
    vs = reg.get_variables()
    yield ok, (0, 'I/scales') in vs
    yield ok, (1, 'I/scales') not in vs
Ejemplo n.º 20
0
def test_sample_and_predict():
    # Use output transform to ensure that is handled correctly.
    reg = GPARRegressor(replace=False, impute=False,
                        linear=True, linear_scale=1., nonlinear=False,
                        noise=1e-8, normalise_y=False,
                        transform_y=squishing_transform)
    x = np.linspace(0, 5, 5)

    # Test checks.
    with pytest.raises(ValueError):
        reg.sample(x)
    with pytest.raises(RuntimeError):
        reg.sample(x, posterior=True)

    # Test that output is simplified correctly.
    assert isinstance(reg.sample(x, p=2), np.ndarray)
    assert isinstance(reg.sample(x, p=2, num_samples=2), list)

    # Test that it produces random samples. Not sure how to test correctness.
    assert np.sum(np.abs(reg.sample(x, p=2) - reg.sample(x, p=2))) >= 1e-2
    assert np.sum(np.abs(reg.sample(x, p=2, latent=True) -
                         reg.sample(x, p=2, latent=True))) >= 1e-3

    # Test that mean of posterior samples are around the data.
    y = reg.sample(x, p=2)
    reg.condition(x, y)
    approx(y, np.mean(reg.sample(x,
                                 posterior=True,
                                 num_samples=100), axis=0), digits=3)
    approx(y, np.mean(reg.sample(x,
                                 latent=True,
                                 posterior=True,
                                 num_samples=100), axis=0), digits=3)

    # Test that prediction is around the data.
    approx(y, reg.predict(x, num_samples=100), digits=3)
    approx(y, reg.predict(x, latent=True, num_samples=100), digits=3)

    # Test that prediction is confident.
    _, lowers, uppers = reg.predict(x, num_samples=100, credible_bounds=True)
    assert np.less_equal(uppers - lowers, 1e-2).all()
Ejemplo n.º 21
0
def test_condition_and_fit():
    reg = GPARRegressor(replace=False, impute=False,
                        normalise_y=True, transform_y=squishing_transform)
    x = np.linspace(0, 5, 10)
    y = reg.sample(x, p=2)

    # Test that data is correctly normalised.
    reg.condition(x, y)
    approx(B.mean(reg.y, axis=0), B.zeros(reg.p))
    approx(B.std(reg.y, axis=0), B.ones(reg.p))

    # Test that data is correctly normalised if it has an output with zero
    # variance.
    y_pathological = y.copy()
    y_pathological[:, 0] = 1
    reg.condition(x, y_pathological)
    assert (~B.isnan(reg.y)).numpy().all()

    # Test transformation and normalisation of outputs.
    z = torch.linspace(-1, 1, 10, dtype=torch.float64)
    z = B.stack(z, 2 * z, axis=1)
    allclose(reg._untransform_y(reg._transform_y(z)), z)
    allclose(reg._unnormalise_y(reg._normalise_y(z)), z)

    # Test that fitting runs without issues.
    vs = reg.vs.copy(detach=True)
    reg.fit(x, y, fix=False)
    reg.vs = vs
    reg.fit(x, y, fix=True)

    # TODO: Remove this once greedy search is implemented.
    with pytest.raises(NotImplementedError):
        reg.fit(x, y, greedy=True)
Ejemplo n.º 22
0
    f1 = -np.sin(10 * np.pi * (x + 1)) / (2 * x + 1) - x**4
    f2 = np.cos(f1)**2 + np.sin(3 * x)
    f3 = f2 * f1**2 + 3 * x
    f = np.stack((f1, f2, f3), axis=0).T

    # Add noise and subsample.
    y = f + noise * np.random.randn(n, 3)
    x_obs, y_obs = x[::8], y[::8]

    # Fit and predict GPAR.
    model = GPARRegressor(
        scale=0.1,
        linear=True,
        linear_scale=10.0,
        nonlinear=True,
        nonlinear_scale=0.1,
        noise=0.1,
        impute=True,
        replace=False,
        normalise_y=False,
    )
    model.fit(x_obs, y_obs)
    means, lowers, uppers = model.predict(x,
                                          num_samples=200,
                                          credible_bounds=True,
                                          latent=True)

    # Fit and predict independent GPs: set `markov=0` in GPAR.
    igp = GPARRegressor(
        scale=0.1,
        linear=True,