kernels = [ vs.pos(1, name=f"{i}/var") * Matern12().stretch(vs.pos(0.1, name=f"{i}/scale")) for i in range(m) ] noise = vs.pos(1e-2, name="noise") latent_noises = vs.pos(1e-2 * B.ones(m), name="latent_noises") h = Dense(vs.get(shape=(p, m), name="h")) return ILMMPP(kernels, h, noise, latent_noises) def objective(vs): return -construct_model(vs).logpdf(torch.tensor(x), torch.tensor(y_norm)) minimise_l_bfgs_b(objective, vs, trace=True, iters=1000) # Predict. model = construct_model(vs) model = model.condition(torch.tensor(x), torch.tensor(y_norm)) means, lowers, uppers = B.to_numpy(model.predict(torch.tensor(x))) # Undo normalisation means, lowers, uppers = normaliser.unnormalise(means, lowers, uppers) # For the purpose of comparison, standardise using the mean of the # *training* data. This is not how the SMSE usually is defined! pred = pd.DataFrame(means, index=train.index, columns=train.columns) smse = ((pred - test)**2).mean(axis=0) / ( (train.mean(axis=0) - test)**2).mean(axis=0)
def fit(self, x, y, w=None, greedy=False, fix=True, **kw_args): """Fit the model to data. Further takes in keyword arguments for `Varz.minimise_l_bfgs_b`. Args: x (tensor): Inputs of training data. y (tensor): Outputs of training data. w (tensor, optional): Weights of training data. greedy (bool, optional): Greedily determine the ordering of the outputs. Defaults to `False`. fix (bool, optional): Fix the parameters of a layer after training it. If set to `False`, the likelihood are accumulated and all parameters are optimised at every step. Defaults to `True`. """ # Conditioned the model before fitting. self.condition(x, y, w) if greedy: raise NotImplementedError('Greedy search is not implemented yet.') # Precompute the results of `per_output`. This can otherwise incur a # significant overhead if the number of outputs is large. y_cached = { k: list(per_output(self.y, self.w, keep=k)) for k in [True, False] } # Fit layer by layer. # Note: `_construct_gpar` takes in the *number* of outputs. with Counter(name='Training conditionals', total=self.p) as counter: for pi in range(self.p): counter.count() # If we fix parameters of previous layers, we can precompute the # inputs. This speeds up the optimisation massively. if fix: gpar = _construct_gpar(self, self.vs, self.m, pi + 1) fixed_x, fixed_x_ind = gpar.logpdf(self.x, y_cached, None, only_last_layer=True, outputs=list(range(pi)), return_inputs=True) def objective(vs): gpar = _construct_gpar(self, vs, self.m, pi + 1) # If the parameters of the previous layers are fixed, use # the precomputed inputs. if fix: return -gpar.logpdf(fixed_x, y_cached, None, only_last_layer=True, outputs=[pi], x_ind=fixed_x_ind) else: return -gpar.logpdf( self.x, y_cached, None, only_last_layer=False) # Determine names to optimise. if fix: names = ['{}/*'.format(pi)] else: names = ['{}/*'.format(i) for i in range(pi + 1)] # Perform the optimisation. minimise_l_bfgs_b(objective, self.vs, names=names, **kw_args)
# Convert to PyTorch. locs = torch.tensor(locs) x_pred = torch.tensor(x_pred) x_data = torch.tensor(x_data) y_data_norm = torch.tensor(y_data_norm) # Model parameters: n = data.shape[0] # Number of data points p = data.shape[1] # Number of outputs m = 10 # Number of latent processes # Learn. vs = Vars(torch.float64) minimise_l_bfgs_b(lambda vs_: objective(vs_, m, x_data, y_data_norm, locs), vs=vs, trace=True, iters=200) wbml.out.kv('Learned spatial scales', vs['scales']) # Predict. lat_preds, obs_preds = predict(vs, m, x_data, y_data_norm, locs, x_pred) # Convert to NumPy and undo normalisation. obs_preds = [ tuple(x * data_mean[0, i] + data_scale[0, i] for x in B.to_numpy(tup)) for i, tup in enumerate(obs_preds) ] # Plot first four latent processes. plt.figure(figsize=(15, 5)) y_proj, _, S, _ = B.to_numpy(project(vs, m, y_data_norm, locs))
u, s, _ = B.svd(B.dense(k(loc))) u_r = Dense(u[:, :m_r]) s_sqrt_r = Diagonal(B.sqrt(s[:m_r])) # Compose. s_sqrt = Kronecker(s_sqrt_s, s_sqrt_r) u = Kronecker(u_s, u_r) return OILMM(kernels, u, s_sqrt, noise, latent_noises) def objective(vs): x_ind = vs.unbounded(x_ind_init, name="x_ind") return -construct_model(vs).logpdf(x_data, y_data, x_ind=x_ind) minimise_l_bfgs_b(objective, vs, trace=True, iters=args.i) # Print variables. vs.print() def cov_to_corr(k): std = B.sqrt(B.diag(k)) return k / std[:, None] / std[None, :] # Compute correlations between simulators. u = Dense(vs["sims/u"]) s_sqrt = Diagonal(vs["sims/s_sqrt"]) k = u @ s_sqrt @ s_sqrt @ u.T std = B.sqrt(B.diag(k)) corr_learned = cov_to_corr(k)
def fit(self, x, y, greedy=False, fix=True, **kw_args): """Fit the model to data. Further takes in keyword arguments for `Varz.minimise_l_bfgs_b`. Args: x (tensor): Inputs of training data. y (tensor): Outputs of training data. greedy (bool, optional): Greedily determine the ordering of the outputs. Defaults to `False`. fix (bool, optional): Fix the parameters of a layer after training it. If set to `False`, the likelihood are accumulated and all parameters are optimised at every step. Defaults to `True`. """ if greedy: raise NotImplementedError('Greedy search is not implemented yet.') # Store data. self.x = torch.tensor(B.uprank(x)) self.y = torch.tensor(self._transform_y(B.uprank(y))) self.n, self.m = self.x.shape self.p = self.y.shape[1] # Perform normalisation, carefully handling missing values. if self.normalise_y: means, stds = [], [] for i in range(self.p): # Filter missing observations. available = ~B.isnan(self.y[:, i]) y_i = self.y[available, i] # Calculate mean. means.append(B.mean(y_i)) # Calculate std: safely handle the zero case. std = B.std(y_i) if std > 0: stds.append(std) else: stds.append(B.cast(B.dtype(std), 1)) # Stack into a vector and create normalisers. means, stds = B.stack(*means)[None, :], B.stack(*stds)[None, :] def normalise_y(y_): return (y_ - means) / stds def unnormalise_y(y_): return y_ * stds + means # Save normalisers. self._normalise_y = normalise_y self._unnormalise_y = unnormalise_y # Perform normalisation. self.y = normalise_y(self.y) # Precompute the results of `per_output`. This can otherwise incur a # significant overhead if the number of outputs is large. y_cached = {k: list(per_output(self.y, keep=k)) for k in [True, False]} # Fit layer by layer. # Note: `_construct_gpar` takes in the *number* of outputs. sys.stdout.write('Training conditionals (total: {}):'.format(self.p)) sys.stdout.flush() for pi in range(self.p): sys.stdout.write(' {}'.format(pi + 1)) sys.stdout.flush() # If we fix parameters of previous layers, we can precompute the # inputs. This speeds up the optimisation massively. if fix: gpar = _construct_gpar(self, self.vs, self.m, pi + 1) fixed_x, fixed_x_ind = gpar.logpdf(self.x, y_cached, only_last_layer=True, outputs=list(range(pi)), return_inputs=True) def objective(vs): gpar = _construct_gpar(self, vs, self.m, pi + 1) # If the parameters of the previous layers are fixed, use the # precomputed inputs. if fix: return -gpar.logpdf(fixed_x, y_cached, only_last_layer=True, outputs=[pi], x_ind=fixed_x_ind) else: return -gpar.logpdf( self.x, y_cached, only_last_layer=False) # Determine names to optimise. if fix: names = ['{}/*'.format(pi)] else: names = ['{}/*'.format(i) for i in range(pi + 1)] # Perform the optimisation. minimise_l_bfgs_b(objective, self.vs, names=names, **kw_args) # Print newline to end progress bar. sys.stdout.write('\n') # Store that the model is fit. self.is_fit = True