def test_glm_gaussian(make_gaus_data, make_random): X, y, Xs, ys = make_gaus_data basis = LinearBasis(onescol=True) lhood = Gaussian() # simple SGD glm = GeneralizedLinearModel(lhood, basis, random_state=make_random) glm.fit(X, y) Ey = glm.predict(Xs) assert smse(ys, Ey) < 0.1 # Test BasisCat basis = LinearBasis(onescol=True) \ + RandomRBF(nbases=20, Xdim=X.shape[1]) \ + RandomMatern52(nbases=20, Xdim=X.shape[1]) glm = GeneralizedLinearModel(lhood, basis, random_state=make_random) glm.fit(X, y) Ey = glm.predict(Xs) assert smse(ys, Ey) < 0.1 # Test upper quantile estimates py, _, _ = glm.predict_cdf(Xs, 1e5) assert np.allclose(py, 1.) # Test log probability lpy, _, _ = glm.predict_logpdf(Xs, Ey) assert np.all(lpy > -100) EyQn, EyQx = glm.predict_interval(Xs, 0.9) assert all(Ey <= EyQx) assert all(Ey >= EyQn)
def test_glm_binomial(make_binom_data, make_random): # This is more to test the logic than to test if the model can overfit, # hence more relaxed SMSE. This is because this is a harder problem than # the previous case. We also haven't split training ans test sets, since we # want to check the latent function and bounds X, y, p, n = make_binom_data f = p * n basis = LinearBasis(onescol=True) \ + RandomRBF(nbases=20, Xdim=X.shape[1]) \ + RandomMatern52(nbases=20, Xdim=X.shape[1]) lhood = Binomial() largs = (n, ) # SGD glm = GeneralizedLinearModel(lhood, basis, random_state=make_random) glm.fit(X, y, likelihood_args=largs) Ey = glm.predict(X, likelihood_args=largs) assert smse(f, Ey) < 1 # Test upper quantile estimates py, _, _ = glm.predict_cdf(X, 1e5, likelihood_args=largs) assert np.allclose(py, 1.) EyQn, EyQx = glm.predict_interval(X, 0.9, likelihood_args=largs) assert all(Ey <= EyQx) assert all(Ey >= EyQn)
def test_sklearn_clone(make_gaus_data): X, y, Xs, ys = make_gaus_data basis = LinearBasis(onescol=True) slm = StandardLinearModel(basis=basis) glm = GeneralizedLinearModel(likelihood=Gaussian(), basis=basis, maxiter=100) slm_clone = clone(slm) glm_clone = clone(glm) slm_clone.fit(X, y) glm_clone.fit(X, y) # scalar values glm_keys = [ 'K', 'batch_size', 'maxiter', 'nsamples', 'random_state', 'updater' ] for k in glm_keys: assert glm.get_params()[k] == glm_clone.get_params()[k] # Manually test likelihood objects assert glm_clone.likelihood.params.value == glm.likelihood.params.value # scalar values slm_keys = ['maxiter', 'tol'] for k in slm_keys: assert slm.get_params()[k] == slm_clone.get_params()[k] # Manually test variance objects assert slm_clone.var.value == slm.var.value
def test_glm_binomial(make_binom_data): # This is more to test the logic than to test if the model can overfit, # hence more relaxed SMSE. This is because this is a harder problem than # the previous case. X, y, p, n = make_binom_data f = p * n basis = LinearBasis(onescol=True) \ + RandomRBF(nbases=20, Xdim=X.shape[1]) \ + RandomMatern52(nbases=20, Xdim=X.shape[1]) lhood = Binomial() largs = (n,) # SGD glm = GeneralisedLinearModel(lhood, basis, random_state=randstate) glm.fit(X, y, likelihood_args=largs) Ey = glm.predict(X, likelihood_args=largs) assert smse(f, Ey) < 1 # Test upper quantile estimates py, _, _ = glm.predict_cdf(1e5, X, likelihood_args=largs) assert np.allclose(py, 1.) EyQn, EyQx = glm.predict_interval(0.9, X, likelihood_args=largs) assert all(Ey <= EyQx) assert all(Ey >= EyQn)
def test_regression(make_data): X, y, w = make_data basis = LinearBasis(onescol=False) params = regression.learn(X, y, basis, []) Ey, Vf, Vy = regression.predict(X, basis, *params) assert rsquare(Ey, y) > 0.9 basis = LinearBasis(onescol=False) + RandomRBF(nbases=10, Xdim=X.shape[1]) params = regression.learn(X, y, basis, [1.]) Ey, Vf, Vy = regression.predict(X, basis, *params) assert rsquare(Ey, y) > 0.9
def test_glm(make_data): X, y, w = make_data basis = LinearBasis(onescol=False) lhood = Gaussian() params = glm.learn(X, y, lhood, [1.], basis, []) Ey, _, _, _ = glm.predict_meanvar(X, lhood, basis, *params) assert rsquare(Ey, y) > 0.9 basis = LinearBasis(onescol=False) + RandomRBF(nbases=10, Xdim=X.shape[1]) params = glm.learn(X, y, lhood, [1.], basis, [1.]) Ey, _, _, _ = glm.predict_meanvar(X, lhood, basis, *params) assert rsquare(Ey, y) > 0.9
def test_pipeline_slm(make_gaus_data): X, y, Xs, ys = make_gaus_data slm = StandardLinearModel(LinearBasis(onescol=True)) estimators = [('PCA', PCA()), ('SLM', slm)] pipe = Pipeline(estimators) pipe.fit(X, y) Ey = pipe.predict(Xs) assert smse(ys, Ey) < 0.1
def test_randomgridsearch_slm(make_gaus_data): X, y, Xs, ys = make_gaus_data slm = StandardLinearModel(LinearBasis(onescol=True)) param_dict = {'var': [Parameter(1.0 / v, Positive()) for v in range(1, 6)]} estimator = RandomizedSearchCV(slm, param_dict, n_jobs=-1, n_iter=2) estimator.fit(X, y) Ey = estimator.predict(Xs) assert len(ys) == len(Ey) # we just want to make sure this all runs
def test_slm(make_gaus_data): X, y, Xs, ys = make_gaus_data basis = LinearBasis(onescol=False) slm = StandardLinearModel(basis) slm.fit(X, y) Ey = slm.predict(Xs) assert smse(ys, Ey) < 0.1 basis = LinearBasis(onescol=False) \ + RandomRBF(nbases=10, Xdim=X.shape[1]) \ + RandomMatern52(nbases=10, Xdim=X.shape[1]) slm = StandardLinearModel(basis) slm.fit(X, y) Ey = slm.predict(Xs) assert smse(ys, Ey) < 0.1
def test_gridsearch_slm(make_gaus_data): X, y, Xs, ys = make_gaus_data slm = StandardLinearModel(LinearBasis(onescol=True)) param_dict = {'var': [Parameter(v, Positive()) for v in [1.0, 2.0]]} estimator = GridSearchCV(slm, param_dict, n_jobs=-1) estimator.fit(X, y) Ey = estimator.predict(Xs) assert len(ys) == len(Ey) # we just want to make sure this all runs
def _make_basis(self, X): D = X.shape[1] lenscale = self.lenscale if self.ard and D > 1: lenscale = np.ones(D) * lenscale lenscale_init = Parameter(lenscale, Positive()) gpbasis = basismap[self.kernel](Xdim=X.shape[1], nbases=self.nbases, lenscale=lenscale_init, regularizer=self.regulariser) self.basis = gpbasis + LinearBasis()
def test_pipeline_glm(make_gaus_data, make_random): X, y, Xs, ys = make_gaus_data glm = GeneralizedLinearModel(Gaussian(), LinearBasis(onescol=True), random_state=make_random) estimators = [('PCA', PCA()), ('SLM', glm)] pipe = Pipeline(estimators) pipe.fit(X, y) Ey = pipe.predict(Xs) assert smse(ys, Ey) < 0.1
def __init__(self, onescol=True, var=1., regulariser=1., tol=1e-8, maxiter=1000, nstarts=100): basis = LinearBasis(onescol=onescol, regularizer=Parameter(regulariser, Positive())) super().__init__(basis=basis, var=Parameter(var, Positive()), tol=tol, maxiter=maxiter, nstarts=nstarts)
def test_gridsearch_glm(make_gaus_data): X, y, Xs, ys = make_gaus_data glm = GeneralizedLinearModel(Gaussian(), LinearBasis(onescol=True), random_state=1, maxiter=100) param_dict = {'batch_size': [10, 20]} estimator = GridSearchCV(glm, param_dict, verbose=1, n_jobs=-1) estimator.fit(X, y) Ey = estimator.predict(Xs) assert len(ys) == len(Ey) # we just want to make sure this all runs
def __init__(self, onescol=True, var=1., regulariser=1., maxiter=3000, batch_size=10, alpha=0.01, beta1=0.9, beta2=0.99, epsilon=1e-8, random_state=None, nstarts=500): basis = LinearBasis(onescol=onescol, regularizer=Parameter(regulariser, Positive())) super().__init__(likelihood=Gaussian(Parameter(var, Positive())), basis=basis, maxiter=maxiter, batch_size=batch_size, updater=Adam(alpha, beta1, beta2, epsilon), random_state=random_state, nstarts=nstarts)
def get_basis(self, basis, regulariser): # whether to add a bias term if isinstance(basis, bool): regulariser = self.get_regularizer(regulariser) basis = LinearBasis(onescol=basis, regularizer=regulariser) return basis
# Log output to the terminal attached to this notebook logging.basicConfig(level=logging.INFO) # Load the data boston = load_boston() X = boston.data y = boston.target - boston.target.mean() folds = 5 (tr_ind, ts_ind) = list(KFold(len(y), n_folds=folds, shuffle=True))[0] # Make Basis and Likelihood N, D = X.shape lenscale = 10. nbases = 50 lenARD = lenscale * np.ones(D) lenscale_init = Parameter(lenARD, Positive()) base = LinearBasis(onescol=True) + RandomMatern32( Xdim=D, nbases=nbases, lenscale_init=lenscale_init) like = Gaussian() # Fit and predict the model glm = GeneralisedLinearModel(like, base, maxiter=6000) glm.fit(X[tr_ind], y[tr_ind]) Ey, Vy = glm.predict_moments(X[ts_ind]) # Score y_true = y[ts_ind] print("SMSE = {}, MSLL = {}".format(smse(y_true, Ey), msll(y_true, Ey, Vy, y[tr_ind])))