def update(self, G, y, data_std): """ Updates the realization. WARNING: should only be used after the covariance module has been updated, since it depends on it having computed the latest quantities. Params ------ y: Tensor Data vector. G: Tensor Measurement matrix. data_std: float Measurement noise standard deviation, assumed to be iid centered gaussian. """ # Compute simulated data. noise = torch.normal(mean=0.0, std=data_std, size=(G.shape[0], 1)) y_prime = G @ self.m + noise y = _make_column_vector(y) y_prime = _make_column_vector(y_prime).double() # Get the latest conditioning operators. K_dash = self.cov_module.pushforwards[-1] R = self.cov_module.inversion_ops[-1] self.m = (self.m.double() + K_dash.double() @ R.double() @ (y - y_prime)).float()
def __init__(self, prior, cov_module): """ Build an updatable mean. Params ------ prior: (n_cells, 1) Tensor Vector defining the prior mean at the model points. We require the number of points to be the same as in the updatable covariance module. Note that we want a column vector. cov_module: UpdatableCovariance """ prior = _make_column_vector(prior) self.prior = prior self.m = prior # Current value of conditional mean. self.n_cells = prior.shape[0] self.cov_module = cov_module if not (self.n_cells == cov_module.n_cells): raise ValueError( "Model size for mean: {} does not agree with "\ "model size for covariance {}.".format( self.n_cells, cov_module.n_cells))
def __init__(self, prior_realization, gp_module): """ Build an updatable realization. Params ------ prior_realization: (n_cells, 1) Tensor Vector defining the prior realization at the model points. We require the number of points to be the same as in the updatable covariance module. Note that we want a column vector. gp_module: UpdatableGP """ prior_realization = _make_column_vector(prior_realization) self.prior_realization = prior_realization self.conditional_mean = UpdatableMean(prior_realization, gp_module.covariance) self.n_cells = prior_realization.shape[0] self.gp_module = gp_module if not (self.n_cells == self.gp_module.n_cells): raise ValueError( "Model size for mean: {} does not agree with "\ "model size for covariance {}.".format( self.n_cells, self.gp_module.n_cells))
def update(self, y, G): """ Updates the means. WARNING: should only be used after the covariance module has been updated, since it depends on it having computed the latest quantities. Params ------ y: Tensor Data vector. G: Tensor Measurement matrix. data_std: float Measurement noise standard deviation, assumed to be iid centered gaussian. """ y = _make_column_vector(y) # Get the latest conditioning operators. K_dash = self.cov_module.pushforwards[-1] R = self.cov_module.inversion_ops[-1] self.m = ( self.m.double() + self.cov_module.sigma0**2 * K_dash.double() @ R.double() @ (y - G @ self.m).double()).float() # ---- # TEMP # ---- torch.save(K_dash, "update_pushfwd.pt") torch.save(R, "update_inversion_op.pt")
def train(self, lambda0s, G, y, data_std, out_path, device=None, n_epochs=5000, lr=0.1, n_chunks=200, n_flush=50): """ Given lambda0, optimize the two remaining hyperparams via MLE. Here, instead of giving lambda0, we give a (stripped) covariance matrix. Stripped means without sigma0. The user can choose between CPU and GPU. Parameters ---------- lambda0s: Iterable List of prior lengthscale for which to optimize the two other hyperparams. G: tensor Measurement matrix y: (n_data, 1) Tensor Observed data. Has to be column vector. data_std: flot Data noise standard deviation. out_path: string Path for training results. device: torch.device Device on which to perform the training. Should be the same as the one the inputs are located on. If None, defaults to gpu0. n_epochs: int Number of training epochs. lr: float Learning rate. Returns ------- (sigma0, nll, train_RMSE) """ start = timer() if device is None: device = self.device y = _make_column_vector(y) # Store results in Pandas DataFrame. df = pd.DataFrame(columns=['lambda0', 'sigma0', 'm0', 'nll', 'train_RMSE']) for lambda0 in lambda0s: (sigma0, m0, nll, train_RMSE) = self.train_fixed_lambda( lambda0, G, y, data_std, device=device, n_epochs=n_epochs, lr=lr, n_chunks=n_chunks, n_flush=n_flush) df = df.append({'lambda0': lambda0, 'sigma0': sigma0, 'm0': m0, 'nll': nll, 'train_RMSE': train_RMSE}, ignore_index=True) # Save after each lambda0. df.to_pickle(out_path) end = timer() print("Training done in {} minutes.".format((end-start)/60)) return df
def condition_fantasy_data(self, prior, stacked_G, fantasy_ys, splitted_inds): """ Compute the posterior mean that would result from observing other data than the one which was assimilated (over the full history). The main use of this method is to compute conditional realizations using residual kriging. Note that this method uses the pre-computed intermediate inversion operators, hence each datapoint should correspond to one of the observation operators used in the update and the noise level should be the same (since it enters the computation of the inversion operators). Parameters ---------- prior: (n_cells, 1) Tensor Vector defining the prior mean at the model points. stacked_G: (d, n_cells) Tensor Observation operators at each step (stacked). Assumed to correspond to the ones used to update the GP. WARNING: Will only work if the GP has been updated with one observation at a time. fantasy_ys: (len(self.pushforwards)) Tensor Vector of observed data values. The n-element should correspond to the n-th updating operation which was performed, hence to the n-th observation operator. Note this is a bit awkward, since currently the UpdatableCovariance module does not inform the user about which observation operator was used at stage n. Moreover, this procedure currently only allows for 1-datapoint observations. Returns ------- conditional_mean: (self.n_cells, 1) Tensor Conditional mean, conditional on the provided data. """ conditional_mean = prior.double() for i, inds in enumerate(splitted_inds): y = _make_column_vector(fantasy_ys[inds]).double() K_dash = self.pushforwards[i].double() R = self.inversion_ops[i] conditional_mean = ( conditional_mean.double() + K_dash @ R @ (y - stacked_G[inds, :].double() @ conditional_mean).double()) return conditional_mean.float()
def concentrate_m0(self, G, y, device=None): """ Compute m0 (prior mean parameter) by MLE via concentration. Note that the inversion operator should have been updated first. """ if device is None: # Check if GPU available and otherwise go for CPU. device = self.device y = _make_column_vector(y).double().to(device) # Prior mean (vector) on the data side. mu0_d_stripped = torch.mm(G.double().to(device), torch.ones((self.n_model, 1), dtype=torch.float64, device=device)) # Compute R^(-1) * G * I_m. tmp = self.inversion_operator @ mu0_d_stripped conc_m0 = (y.t() @ tmp) / (mu0_d_stripped.t() @ tmp) return conc_m0.float()
def neg_log_likelihood(self, y, G, m0, device=None): """ Computes the negative log-likelihood of the current state of the model. Note that this function should be called AFTER having run a conditioning, since it depends on the inversion operator computed there. Params ------ y: (n_data, 1) Tensor Observed data. Has to be column vector. G: tensor Measurement matrix m0: float device: torch.device Device on which to perform the training. Should be the same as the one the inputs are located on. If None, defaults to gpu0. Returns ------- float """ if device is None: device = self.device y = _make_column_vector(y) # WARNING!!! determinant is not linear! Taking constants outside adds # power to them. log_det = torch.logdet(self.R).double() mu0_d_stripped = torch.mm(G.double().to(device), torch.ones((self.n_model, 1), dtype=torch.float64, device=device)) mu0_d = m0 * mu0_d_stripped prior_misfit = y.double() - mu0_d.double() nll = log_det + torch.mm(prior_misfit.t(), self.weights) return nll
def train_fixed_lambda(self, lambda0, G, y, data_std, device=None, n_epochs=5000, lr=0.1, n_chunks=200, n_flush=50): """ Given lambda0, optimize the two remaining hyperparams via MLE. Here, instead of giving lambda0, we give a (stripped) covariance matrix. Stripped means without sigma0. The user can choose between CPU and GPU. Parameters ---------- lambda0: float Prior lengthscale for which to optimize the two other hyperparams. G: tensor Measurement matrix y: (n_data, 1) Tensor Observed data. Has to be column vector. data_std: flot Data noise standard deviation. device: torch.device Device on which to perform the training. Should be the same as the one the inputs are located on. If None, defaults to gpu0. n_epochs: int Number of training epochs. lr: float Learning rate. Returns ------- (sigma0, nll, train_RMSE) """ if device is None: device = self.device y = _make_column_vector(y).to(device) # Compute the pushforward once and for all, since it only depends on # lambda0 and G. self.lambda0 = lambda0 self.compute_pushfwd(G) # Make column vector. optimizer = torch.optim.Adam(self.parameters(), lr=lr) for epoch in range(n_epochs + 1): # Forward pass: Compute predicted y by passing # x to the model m_post_d, nll, data_std = self.condition_data(G, y, data_std, concentrate=True, is_precomp_pushfwd=True, device=device) # Zero gradients, perform a backward pass, # and update the weights. optimizer.zero_grad() nll.backward(retain_graph=True) optimizer.step() # Periodically print informations. if epoch % 100 == 0: # Compute train error. train_RMSE = torch.sqrt(torch.mean( (y - m_post_d)**2)) self.logger.info("Epoch: {}/{}".format(epoch, n_epochs)) self.logger.info("lambda0: {}".format(lambda0)) self.logger.info("sigma0: {}".format(self.sigma0.item())) self.logger.info("m0: {}".format(self.m0.item())) self.logger.info("Log-likelihood: {}".format(nll.item())) self.logger.info("RMSE train error: {}".format(train_RMSE.item())) return (self.sigma0.item(), self.m0.item(), nll.item(), train_RMSE.item())
def condition_model(self, G, y, data_std, concentrate=False, is_precomp_pushfwd=False, device=None, hypothetical=False): """ Given a bunch of measurement, condition model on the data side. I.e. only compute the conditional law of the data vector G Z, not of Z itself. Parameters ---------- G: tensor Measurement matrix y: (n_data, 1) Tensor Observed data. Has to be column vector. data_std: flot Data noise standard deviation. concentrate: bool If true, then will compute m0 by MLE via concentration of the log-likelihood instead of using the current value of the hyperparameter. is_precomp_pushfwd: bool Set to True if the covariance pushforward has already been computed by a previous operation. Can be used to speedup calculations if previous calculations have already computed the pushforward. device: torch.device Device on which to perform the training. Should be the same as the one the inputs are located on. If None, defaults to gpu0. hypothetical: bool, default=False If set to true, then the internals of the GP (pushfwd, inversion_op) are not updated. Use when considering hypothetical data. Returns ------- mu_post_m Posterior mean model vector mu_post_d Posterior mean data vector """ if device is None: device = self.device y = _make_column_vector(y).to(device) # Conditioning model is just conditioning on data and then computing # posterior mean and (co-)variance on model side. m_post_d, nll, data_std = self.condition_data(G, y, data_std, concentrate=concentrate, is_precomp_pushfwd=is_precomp_pushfwd) # Posterior model mean. # Can re-use the m0 and weights computed by condition_data. if concentrate: m0 = self.concentrate_m0(G, y) else: m0 = self.m0 m_post_m = ( m0 * torch.ones((self.n_model, 1), dtype=torch.float64, device=device) + (self.sigma0.double()**2 * self.pushfwd.double() @ self.weights)) # Save in case. self.m_post_m = m_post_m.float() return self.m_post_m, m_post_d.float()
def condition_data(self, G, y, data_std, concentrate=False, is_precomp_pushfwd=False, device=None): """ Given a bunch of measurement, condition model on the data side. I.e. only compute the conditional law of the data vector G Z, not of Z itself. Parameters ---------- G: tensor Measurement matrix y: (n_data, 1) Tensor Observed data. Has to be column vector. data_std: flot Data noise standard deviation. concentrate: bool If true, then will compute m0 by MLE via concentration of the log-likelihood instead of using the current value of the hyperparameter. is_precomp_pushfwd: bool Set to True if the covariance pushforward has already been computed by a previous operation. Can be used to speedup calculations if previous calculations have already computed the pushforward. device: torch.device Device on which to perform the training. Should be the same as the one the inputs are located on. If None, defaults to gpu0. hypothetical: bool, default=False If set to true, then the internals of the GP (pushfwd, inversion_op) are not updated. Use when considering hypothetical data. Returns ------- mu_post_d: tensor Posterior mean data vector nll: tensor Negative log likelihood. float When noise has to be increased to make matrices invertible, this gives the new value of the noise standard deviation. """ if device is None: device = self.device if not is_precomp_pushfwd: self.compute_pushfwd(G) y = _make_column_vector(y).to(device) # Get inversion operator. self.inversion_operator, data_std = self.get_inversion_op(self.K_d, data_std) if concentrate: # Determine m0 (on the model side) from sigma0 by concentration of the Ll. self.m0 = self.concentrate_m0(G, y) # Prior mean (vector) on the data side. mu0_d_stripped = (G.to(device) @ torch.ones((self.n_model, 1), dtype=torch.float32, device=device)) mu0_d = self.m0 * mu0_d_stripped prior_misfit = y.double() - mu0_d.double() self.weights = self.inversion_operator @ prior_misfit m_post_d = mu0_d + torch.mm(self.sigma0**2 * self.K_d, self.weights) nll = self.neg_log_likelihood(y, G, self.m0) return m_post_d.float(), nll.float(), data_std