class BayesianFourierRegressor: def __init__(self, sizes, bandwidth, prior=None): super(BayesianFourierRegressor, self).__init__() self.bandwidth = bandwidth self.sizes = sizes self.target_size = self.sizes[-1] self.input_size = self.sizes[0] self.hidden_size = self.sizes[1] self.basis = FourierFeatures(self.sizes, self.bandwidth) if prior is None: # A standard relatively uninformative prior hypparams = dict(M=np.zeros((self.target_size, self.hidden_size + 1)), K=1e-2 * np.eye(self.hidden_size + 1), psi=np.eye(self.target_size), nu=self.target_size + 1) prior = MatrixNormalWishart(**hypparams) self.model = LinearGaussianWithMatrixNormalWishart(prior, affine=True) self.input_trans = None self.target_trans = None def features(self, input): return self.basis.fit_transform(input) def predict(self, input): input = transform(input.reshape((-1, self.input_size)), self.input_trans) feat = self.features(input) output, _, _ = self.model.posterior_predictive_gaussian(np.squeeze(feat)) output = inverse_transform(output, self.target_trans).squeeze() return output def init_preprocess(self, target, input): self.target_trans = StandardScaler() self.input_trans = StandardScaler() self.target_trans.fit(target) self.input_trans.fit(input) @ensure_args_atleast_2d def fit(self, target, input, preprocess=True, nb_iter=3): if preprocess: self.init_preprocess(target, input) target = transform(target, self.target_trans) input = transform(input, self.input_trans) feat = self.features(input) for _ in range(nb_iter): # do empirical bayes self.model.meanfield_update(y=target, x=feat) self.model.prior = self.model.posterior
def __init__(self, sizes, bandwidth, prior=None): super(BayesianFourierRegressor, self).__init__() self.bandwidth = bandwidth self.sizes = sizes self.target_size = self.sizes[-1] self.input_size = self.sizes[0] self.hidden_size = self.sizes[1] self.basis = FourierFeatures(self.sizes, self.bandwidth) if prior is None: # A standard relatively uninformative prior hypparams = dict(M=np.zeros( (self.target_size, self.hidden_size + 1)), K=1e-2 * np.eye(self.hidden_size + 1), psi=np.eye(self.target_size), nu=self.target_size + 1) prior = MatrixNormalWishart(**hypparams) self.model = LinearGaussianWithMatrixNormalWishart(prior, affine=True) self.input_trans = None self.target_trans = None
def learn(self, data): noise = np.zeros((self.dm_state, self.dm_state, self.nb_steps)) for t in range(self.nb_steps): input = np.hstack((data['x'][:, t, :].T, data['u'][:, t, :].T)) target = data['xn'][:, t, :].T model = LinearGaussianWithMatrixNormalWishart(self.prior, affine=True) model = model.meanfield_update(y=target, x=input) self.mu[..., t] = np.reshape(model.posterior.matnorm.M, self.mu[..., t].shape, order='F') self.sigma[..., t] = np.linalg.inv(np.kron(model.posterior.matnorm.K, model.posterior.wishart.mode())) noise[..., t] = np.linalg.inv(model.posterior.wishart.mode()) return noise
def learn(self, data): for t in range(self.nb_steps): input = np.hstack((data['x'][:, t, :].T, data['u'][:, t, :].T)) target = data['xn'][:, t, :].T model = LinearGaussianWithMatrixNormalWishart(self.prior, affine=True) model = model.max_aposteriori(y=target, x=input) self.A[..., t] = model.likelihood.A[:, :self.dm_state] self.B[..., t] = model.likelihood.A[:, self.dm_state:self.dm_state + self.dm_act] self.c[..., t] = model.likelihood.A[:, -1] self.sigma[..., t] = model.likelihood.sigma
def _job(kwargs): args = kwargs.pop('arguments') seed = kwargs.pop('seed') input = kwargs.pop('train_input') target = kwargs.pop('train_target') input_dim = input.shape[-1] target_dim = target.shape[-1] # set random seed np.random.seed(seed) nb_params = input_dim if args.affine: nb_params += 1 basis_prior = [] models_prior = [] # initialize Normal psi_nw = 1e0 kappa = 1e-2 # initialize Matrix-Normal psi_mnw = 1e0 K = 1e-3 for n in range(args.nb_models): basis_hypparams = dict(mu=np.zeros((input_dim, )), psi=np.eye(input_dim) * psi_nw, kappa=kappa, nu=input_dim + 1) aux = NormalWishart(**basis_hypparams) basis_prior.append(aux) models_hypparams = dict(M=np.zeros((target_dim, nb_params)), K=K * np.eye(nb_params), nu=target_dim + 1, psi=np.eye(target_dim) * psi_mnw) aux = MatrixNormalWishart(**models_hypparams) models_prior.append(aux) # define gating if args.prior == 'stick-breaking': gating_hypparams = dict(K=args.nb_models, gammas=np.ones((args.nb_models, )), deltas=np.ones( (args.nb_models, )) * args.alpha) gating_prior = TruncatedStickBreaking(**gating_hypparams) ilr = BayesianMixtureOfLinearGaussians( gating=CategoricalWithStickBreaking(gating_prior), basis=[ GaussianWithNormalWishart(basis_prior[i]) for i in range(args.nb_models) ], models=[ LinearGaussianWithMatrixNormalWishart(models_prior[i], affine=args.affine) for i in range(args.nb_models) ]) else: gating_hypparams = dict(K=args.nb_models, alphas=np.ones( (args.nb_models, )) * args.alpha) gating_prior = Dirichlet(**gating_hypparams) ilr = BayesianMixtureOfLinearGaussians( gating=CategoricalWithDirichlet(gating_prior), basis=[ GaussianWithNormalWishart(basis_prior[i]) for i in range(args.nb_models) ], models=[ LinearGaussianWithMatrixNormalWishart(models_prior[i], affine=args.affine) for i in range(args.nb_models) ]) ilr.add_data(target, input, whiten=True) # Gibbs sampling ilr.resample(maxiter=args.gibbs_iters, progprint=args.verbose) for _ in range(args.super_iters): if args.stochastic: # Stochastic meanfield VI ilr.meanfield_stochastic_descent(maxiter=args.svi_iters, stepsize=args.svi_stepsize, batchsize=args.svi_batchsize) if args.deterministic: # Meanfield VI ilr.meanfield_coordinate_descent(tol=args.earlystop, maxiter=args.meanfield_iters, progprint=args.verbose) ilr.gating.prior = ilr.gating.posterior for i in range(ilr.likelihood.size): ilr.basis[i].prior = ilr.basis[i].posterior ilr.models[i].prior = ilr.models[i].posterior return ilr
aux = MatrixNormalWishart(**models_hypparams) models_prior.append(aux) gating_hypparams = dict(K=args.nb_models, gammas=np.ones((args.nb_models, )), deltas=np.ones((args.nb_models, )) * args.alpha) gating_prior = TruncatedStickBreaking(**gating_hypparams) ilr = BayesianMixtureOfLinearGaussians( gating=CategoricalWithStickBreaking(gating_prior), basis=[ GaussianWithNormalWishart(basis_prior[i]) for i in range(args.nb_models) ], models=[ LinearGaussianWithMatrixNormalWishart(models_prior[i], affine=args.affine) for i in range(args.nb_models) ]) import copy from sklearn.utils import shuffle from sklearn.metrics import mean_squared_error, r2_score anim = [] split_size = int(nb_train / args.nb_splits) mse = np.zeros((args.nb_splits, )) smse = np.zeros((args.nb_splits, )) nb_models = np.zeros((args.nb_splits, ), dtype=np.int64)
from mimo.distributions import LinearGaussianWithMatrixNormalWishart # npr.seed(1337) dcol = 50 drow = 1 A = 1. * npr.randn(drow, dcol) nb_samples = 200 nb_datasets = 10 dist = LinearGaussianWithPrecision(A=A, lmbda=100 * np.eye(drow), affine=False) x = [npr.randn(nb_samples, dcol) for _ in range(nb_datasets)] y = [dist.rvs(_x) for _x in x] print("True transf." + "\n", dist.A, "\n" + "True sigma" + "\n", dist.sigma) affine = False nb_params = dcol + 1 if affine else dcol hypparams = dict(M=np.zeros((drow, nb_params)), K=1e-2 * np.eye(nb_params), psi=np.eye(drow), nu=drow + 1) prior = MatrixNormalWishart(**hypparams) model = LinearGaussianWithMatrixNormalWishart(prior, affine=False) model.meanfield_update(y=y, x=x) print("VI transf." + "\n", model.likelihood.A, "\n" + "VI covariance" + "\n", model.likelihood.sigma)
from mimo.distributions import LinearGaussianWithMatrixNormalWishart npr.seed(1337) dcol = 50 drow = 1 A = 1. * npr.randn(drow, dcol) nb_samples = 200 nb_datasets = 10 dist = LinearGaussianWithPrecision(A=A, lmbda=100 * np.eye(drow), affine=False) x = [npr.randn(nb_samples, dcol) for _ in range(nb_datasets)] y = [dist.rvs(_x) for _x in x] print("True transf." + "\n", dist.A, "\n" + "True sigma" + "\n", dist.sigma) affine = False nb_params = dcol + 1 if affine else dcol hypparams = dict(M=np.zeros((drow, nb_params)), K=1e-2 * np.eye(nb_params), psi=np.eye(drow), nu=drow + 1) prior = MatrixNormalWishart(**hypparams) model = LinearGaussianWithMatrixNormalWishart(prior, affine=False) model.max_aposteriori(y=y, x=x) print("MAP transf." + "\n", model.likelihood.A, "\n" + "MAP covariance" + "\n", model.likelihood.sigma)
for i in relevant_features: w[i] = stats.norm.rvs(loc=10., scale=1. / np.sqrt(lambda_)) alpha_ = 10. noise = stats.norm.rvs(loc=0., scale=1. / np.sqrt(alpha_), size=nb_samples) y = np.dot(X, w) + noise y = y.reshape(-1, 1) hypparams = dict(M=np.zeros((1, nb_features)), K=1e-2 * np.eye(nb_features), psi=np.eye(1), nu=2) prior = MatrixNormalWishart(**hypparams) std = LinearGaussianWithMatrixNormalWishart(prior, affine=False) # std.resample(y=y, x=X) std.meanfield_update(y, X) print("STD transf."+"\n", std.posterior.matnorm.mean(), "\n"+"STD precision"+"\n", std.posterior.wishart.mean()) hyphypparams = dict(alphas=1. * np.ones(nb_features), betas=1. / (2. * 1e2) * np.ones(nb_features)) hypprior = Gamma(**hyphypparams) ard = LinearGaussianWithMatrixNormalWishartAndAutomaticRelevance(prior, hypprior, affine=False) # ard.resample(y=y, x=X) ard.meanfield_update(y, X) print("ARD transf."+"\n", ard.posterior.matnorm.mean(), "\n"+"ARD precision"+"\n", ard.posterior.wishart.mean())
from mimo.distributions import LinearGaussianWithMatrixNormalWishart npr.seed(1337) dcol = 50 drow = 1 A = 1. * npr.randn(drow, dcol) nb_samples = 200 nb_datasets = 10 dist = LinearGaussianWithPrecision(A=A, lmbda=100 * np.eye(drow), affine=False) x = [npr.randn(nb_samples, dcol) for _ in range(nb_datasets)] y = [dist.rvs(_x) for _x in x] print("True transf." + "\n", dist.A, "\n" + "True sigma" + "\n", dist.sigma) affine = False nb_params = dcol + 1 if affine else dcol hypparams = dict(M=np.zeros((drow, nb_params)), K=1e-2 * np.eye(nb_params), psi=np.eye(drow), nu=drow + 1) prior = MatrixNormalWishart(**hypparams) model = LinearGaussianWithMatrixNormalWishart(prior, affine=False) model.resample(y=y, x=x) print("Gibbs transf." + "\n", model.likelihood.A, "\n" + "Gibbs covariance" + "\n", model.likelihood.sigma)