Exemple #1
0
def test_glm_gaussian(make_gaus_data, make_random):

    X, y, Xs, ys = make_gaus_data

    basis = LinearBasis(onescol=True)
    lhood = Gaussian()

    # simple SGD
    glm = GeneralizedLinearModel(lhood, basis, random_state=make_random)
    glm.fit(X, y)
    Ey = glm.predict(Xs)
    assert smse(ys, Ey) < 0.1

    # Test BasisCat
    basis = LinearBasis(onescol=True) \
        + RandomRBF(nbases=20, Xdim=X.shape[1]) \
        + RandomMatern52(nbases=20, Xdim=X.shape[1])

    glm = GeneralizedLinearModel(lhood, basis, random_state=make_random)
    glm.fit(X, y)
    Ey = glm.predict(Xs)
    assert smse(ys, Ey) < 0.1

    # Test upper quantile estimates
    py, _, _ = glm.predict_cdf(Xs, 1e5)
    assert np.allclose(py, 1.)

    # Test log probability
    lpy, _, _ = glm.predict_logpdf(Xs, Ey)
    assert np.all(lpy > -100)

    EyQn, EyQx = glm.predict_interval(Xs, 0.9)
    assert all(Ey <= EyQx)
    assert all(Ey >= EyQn)
Exemple #2
0
def test_glm_binomial(make_binom_data, make_random):
    # This is more to test the logic than to test if the model can overfit,
    # hence more relaxed SMSE. This is because this is a harder problem than
    # the previous case. We also haven't split training ans test sets, since we
    # want to check the latent function and bounds

    X, y, p, n = make_binom_data
    f = p * n

    basis = LinearBasis(onescol=True) \
        + RandomRBF(nbases=20, Xdim=X.shape[1]) \
        + RandomMatern52(nbases=20, Xdim=X.shape[1])
    lhood = Binomial()
    largs = (n, )

    # SGD
    glm = GeneralizedLinearModel(lhood, basis, random_state=make_random)
    glm.fit(X, y, likelihood_args=largs)
    Ey = glm.predict(X, likelihood_args=largs)

    assert smse(f, Ey) < 1

    # Test upper quantile estimates
    py, _, _ = glm.predict_cdf(X, 1e5, likelihood_args=largs)
    assert np.allclose(py, 1.)

    EyQn, EyQx = glm.predict_interval(X, 0.9, likelihood_args=largs)
    assert all(Ey <= EyQx)
    assert all(Ey >= EyQn)
Exemple #3
0
def test_sklearn_clone(make_gaus_data):

    X, y, Xs, ys = make_gaus_data

    basis = LinearBasis(onescol=True)
    slm = StandardLinearModel(basis=basis)
    glm = GeneralizedLinearModel(likelihood=Gaussian(),
                                 basis=basis,
                                 maxiter=100)

    slm_clone = clone(slm)
    glm_clone = clone(glm)

    slm_clone.fit(X, y)
    glm_clone.fit(X, y)

    # scalar values
    glm_keys = [
        'K', 'batch_size', 'maxiter', 'nsamples', 'random_state', 'updater'
    ]

    for k in glm_keys:
        assert glm.get_params()[k] == glm_clone.get_params()[k]

    # Manually test likelihood objects
    assert glm_clone.likelihood.params.value == glm.likelihood.params.value

    # scalar values
    slm_keys = ['maxiter', 'tol']

    for k in slm_keys:
        assert slm.get_params()[k] == slm_clone.get_params()[k]

    # Manually test variance objects
    assert slm_clone.var.value == slm.var.value
Exemple #4
0
def test_glm_binomial(make_binom_data):
    # This is more to test the logic than to test if the model can overfit,
    # hence more relaxed SMSE. This is because this is a harder problem than
    # the previous case.

    X, y, p, n = make_binom_data
    f = p * n

    basis = LinearBasis(onescol=True) \
        + RandomRBF(nbases=20, Xdim=X.shape[1]) \
        + RandomMatern52(nbases=20, Xdim=X.shape[1])
    lhood = Binomial()
    largs = (n,)

    # SGD
    glm = GeneralisedLinearModel(lhood, basis, random_state=randstate)
    glm.fit(X, y, likelihood_args=largs)
    Ey = glm.predict(X, likelihood_args=largs)

    assert smse(f, Ey) < 1

    # Test upper quantile estimates
    py, _, _ = glm.predict_cdf(1e5, X, likelihood_args=largs)
    assert np.allclose(py, 1.)

    EyQn, EyQx = glm.predict_interval(0.9, X, likelihood_args=largs)
    assert all(Ey <= EyQx)
    assert all(Ey >= EyQn)
Exemple #5
0
def test_regression(make_data):

    X, y, w = make_data

    basis = LinearBasis(onescol=False)

    params = regression.learn(X, y, basis, [])
    Ey, Vf, Vy = regression.predict(X, basis, *params)

    assert rsquare(Ey, y) > 0.9

    basis = LinearBasis(onescol=False) + RandomRBF(nbases=10, Xdim=X.shape[1])

    params = regression.learn(X, y, basis, [1.])
    Ey, Vf, Vy = regression.predict(X, basis, *params)

    assert rsquare(Ey, y) > 0.9
Exemple #6
0
def test_glm(make_data):

    X, y, w = make_data

    basis = LinearBasis(onescol=False)
    lhood = Gaussian()

    params = glm.learn(X, y, lhood, [1.], basis, [])
    Ey, _, _, _ = glm.predict_meanvar(X, lhood, basis, *params)

    assert rsquare(Ey, y) > 0.9

    basis = LinearBasis(onescol=False) + RandomRBF(nbases=10, Xdim=X.shape[1])

    params = glm.learn(X, y, lhood, [1.], basis, [1.])
    Ey, _, _, _ = glm.predict_meanvar(X, lhood, basis, *params)

    assert rsquare(Ey, y) > 0.9
Exemple #7
0
def test_pipeline_slm(make_gaus_data):

    X, y, Xs, ys = make_gaus_data

    slm = StandardLinearModel(LinearBasis(onescol=True))
    estimators = [('PCA', PCA()), ('SLM', slm)]
    pipe = Pipeline(estimators)

    pipe.fit(X, y)
    Ey = pipe.predict(Xs)
    assert smse(ys, Ey) < 0.1
Exemple #8
0
def test_randomgridsearch_slm(make_gaus_data):

    X, y, Xs, ys = make_gaus_data

    slm = StandardLinearModel(LinearBasis(onescol=True))

    param_dict = {'var': [Parameter(1.0 / v, Positive()) for v in range(1, 6)]}
    estimator = RandomizedSearchCV(slm, param_dict, n_jobs=-1, n_iter=2)

    estimator.fit(X, y)
    Ey = estimator.predict(Xs)
    assert len(ys) == len(Ey)  # we just want to make sure this all runs
Exemple #9
0
def test_slm(make_gaus_data):

    X, y, Xs, ys = make_gaus_data

    basis = LinearBasis(onescol=False)

    slm = StandardLinearModel(basis)
    slm.fit(X, y)
    Ey = slm.predict(Xs)

    assert smse(ys, Ey) < 0.1

    basis = LinearBasis(onescol=False) \
        + RandomRBF(nbases=10, Xdim=X.shape[1]) \
        + RandomMatern52(nbases=10, Xdim=X.shape[1])

    slm = StandardLinearModel(basis)
    slm.fit(X, y)
    Ey = slm.predict(Xs)

    assert smse(ys, Ey) < 0.1
Exemple #10
0
def test_gridsearch_slm(make_gaus_data):

    X, y, Xs, ys = make_gaus_data

    slm = StandardLinearModel(LinearBasis(onescol=True))

    param_dict = {'var': [Parameter(v, Positive()) for v in [1.0, 2.0]]}
    estimator = GridSearchCV(slm, param_dict, n_jobs=-1)

    estimator.fit(X, y)
    Ey = estimator.predict(Xs)
    assert len(ys) == len(Ey)  # we just want to make sure this all runs
Exemple #11
0
    def _make_basis(self, X):

        D = X.shape[1]
        lenscale = self.lenscale
        if self.ard and D > 1:
            lenscale = np.ones(D) * lenscale
        lenscale_init = Parameter(lenscale, Positive())
        gpbasis = basismap[self.kernel](Xdim=X.shape[1],
                                        nbases=self.nbases,
                                        lenscale=lenscale_init,
                                        regularizer=self.regulariser)

        self.basis = gpbasis + LinearBasis()
Exemple #12
0
def test_pipeline_glm(make_gaus_data, make_random):

    X, y, Xs, ys = make_gaus_data

    glm = GeneralizedLinearModel(Gaussian(),
                                 LinearBasis(onescol=True),
                                 random_state=make_random)
    estimators = [('PCA', PCA()), ('SLM', glm)]
    pipe = Pipeline(estimators)

    pipe.fit(X, y)
    Ey = pipe.predict(Xs)
    assert smse(ys, Ey) < 0.1
Exemple #13
0
    def __init__(self,
                 onescol=True,
                 var=1.,
                 regulariser=1.,
                 tol=1e-8,
                 maxiter=1000,
                 nstarts=100):

        basis = LinearBasis(onescol=onescol,
                            regularizer=Parameter(regulariser, Positive()))
        super().__init__(basis=basis,
                         var=Parameter(var, Positive()),
                         tol=tol,
                         maxiter=maxiter,
                         nstarts=nstarts)
Exemple #14
0
def test_gridsearch_glm(make_gaus_data):

    X, y, Xs, ys = make_gaus_data

    glm = GeneralizedLinearModel(Gaussian(),
                                 LinearBasis(onescol=True),
                                 random_state=1,
                                 maxiter=100)

    param_dict = {'batch_size': [10, 20]}
    estimator = GridSearchCV(glm, param_dict, verbose=1, n_jobs=-1)

    estimator.fit(X, y)
    Ey = estimator.predict(Xs)
    assert len(ys) == len(Ey)  # we just want to make sure this all runs
Exemple #15
0
 def __init__(self,
              onescol=True,
              var=1.,
              regulariser=1.,
              maxiter=3000,
              batch_size=10,
              alpha=0.01,
              beta1=0.9,
              beta2=0.99,
              epsilon=1e-8,
              random_state=None,
              nstarts=500):
     basis = LinearBasis(onescol=onescol,
                         regularizer=Parameter(regulariser, Positive()))
     super().__init__(likelihood=Gaussian(Parameter(var, Positive())),
                      basis=basis,
                      maxiter=maxiter,
                      batch_size=batch_size,
                      updater=Adam(alpha, beta1, beta2, epsilon),
                      random_state=random_state,
                      nstarts=nstarts)
Exemple #16
0
 def get_basis(self, basis, regulariser):
     # whether to add a bias term
     if isinstance(basis, bool):
         regulariser = self.get_regularizer(regulariser)
         basis = LinearBasis(onescol=basis, regularizer=regulariser)
     return basis
Exemple #17
0
# Log output to the terminal attached to this notebook
logging.basicConfig(level=logging.INFO)

# Load the data
boston = load_boston()
X = boston.data
y = boston.target - boston.target.mean()

folds = 5
(tr_ind, ts_ind) = list(KFold(len(y), n_folds=folds, shuffle=True))[0]

# Make Basis and Likelihood
N, D = X.shape
lenscale = 10.
nbases = 50
lenARD = lenscale * np.ones(D)
lenscale_init = Parameter(lenARD, Positive())
base = LinearBasis(onescol=True) + RandomMatern32(
    Xdim=D, nbases=nbases, lenscale_init=lenscale_init)
like = Gaussian()

# Fit and predict the model
glm = GeneralisedLinearModel(like, base, maxiter=6000)
glm.fit(X[tr_ind], y[tr_ind])
Ey, Vy = glm.predict_moments(X[ts_ind])

# Score
y_true = y[ts_ind]
print("SMSE = {}, MSLL = {}".format(smse(y_true, Ey),
                                    msll(y_true, Ey, Vy, y[tr_ind])))