def test_kernel_vector(params, ihyps): name, cutoffs, hyps_mask_list, _ = params np.random.seed(10) test_point = get_tstp() size1 = len(flare.gp_algebra._global_training_data[name]) size2 = len(flare.gp_algebra._global_training_structures[name]) hyps_mask = hyps_mask_list[ihyps] hyps = hyps_mask["hyps"] kernel = str_to_kernel_set(hyps_mask["kernels"], "mc", hyps_mask) # test the parallel implementation for multihyps vec = get_kernel_vector(name, kernel[0], kernel[3], test_point, 1, hyps, cutoffs, hyps_mask) vec_par = get_kernel_vector( name, kernel[0], kernel[3], test_point, 1, hyps, cutoffs, hyps_mask, n_cpus=2, n_sample=100, ) assert np.isclose(vec, vec_par, rtol=1e-4).all(), "parallel implementation is wrong" assert vec.shape[0] == size1 * 3 + size2
def test_get_kernel_vector(params): hyps, name, kernel, cutoffs, \ kernel_m, hyps_list, hyps_mask_list = params test_point = get_tstp() size = len(flare.gp_algebra._global_training_data[name]) # test the parallel implementation for multihyps vec = get_kernel_vector(name, kernel_m[0], test_point, 1, hyps, cutoffs, hyps_mask_list[0]) vec_par = get_kernel_vector(name, kernel_m[0], test_point, 1, hyps, cutoffs, hyps_mask_list[0], n_cpus=2, n_sample=100) assert (all(np.equal(vec, vec_par))), "parallel implementation is wrong" assert (vec.shape[0] == size*3), \ f"{vec} {size}"
def predict(self, x_t: AtomicEnvironment, d: int) -> [float, float]: """ Predict a force component of the central atom of a local environment. Args: x_t (AtomicEnvironment): Input local environment. d (int): Force component to be predicted (1 is x, 2 is y, and 3 is z). Return: (float, float): Mean and epistemic variance of the prediction. """ if d not in [1, 2, 3]: raise ValueError("d should be 1, 2, or 3") # Kernel vector allows for evaluation of atomic environments. if self.parallel and not self.per_atom_par: n_cpus = self.n_cpus else: n_cpus = 1 self.sync_data() k_v = get_kernel_vector( self.name, self.kernel, self.energy_force_kernel, x_t, d, self.hyps, cutoffs=self.cutoffs, hyps_mask=self.hyps_mask, n_cpus=n_cpus, n_sample=self.n_sample, ) # Guarantee that alpha is up to date with training set self.check_L_alpha() # get predictive mean pred_mean = np.matmul(k_v, self.alpha) # get predictive variance without cholesky (possibly faster) # pass args to kernel based on if mult. hyperparameters in use args = from_mask_to_args(self.hyps, self.cutoffs, self.hyps_mask) self_kern = self.kernel(x_t, x_t, d, d, *args) pred_var = self_kern - np.matmul(np.matmul(k_v, self.ky_mat_inv), k_v) return pred_mean, pred_var
def test_prediction(): """ Test that prediction functions works. The RBCM in the 1-expert case *does not* reduce to a GP's predictions, because the way the mean and variance is computed for each expert is weighted based on the expert's performance on the entire dataset in a way that does not yield 1 in the absence of other experts. Hence, perform the relevant transformations on a GP's prediction and check it against the RBCM's. :return: """ prior_var = 0.1 rbcm = RobustBayesianCommitteeMachine( ndata_per_expert=100, prior_variance=prior_var, ) gp = GaussianProcess() envs = methanol_envs[:10] for env in envs: rbcm.add_one_env(env, env.force) gp.add_one_env(env, env.force, train=False) struc = methanol_frames[-1] gp.update_db(struc, forces=struc.forces) rbcm.update_db(struc, forces=struc.forces) test_env = methanol_envs[-1] for d in [1, 2, 3]: assert np.array_equal(gp.hyps, rbcm.hyps) rbcm_pred = rbcm.predict(test_env, d) gp_pred = gp.predict(test_env, d) gp_kv = get_kernel_vector( gp.name, gp.kernel, gp.energy_force_kernel, test_env, d, gp.hyps, cutoffs=gp.cutoffs, hyps_mask=gp.hyps_mask, n_cpus=1, n_sample=gp.n_sample, ) gp_mean = np.matmul(gp_kv, gp.alpha) assert gp_mean == gp_pred[0] gp_self_kern = gp.kernel( env1=test_env, env2=test_env, d1=d, d2=d, hyps=gp.hyps, cutoffs=np.array((7, 3.5)), ) gp_var_i = gp_self_kern - np.matmul(np.matmul(gp_kv.T, gp.ky_mat_inv), gp_kv) gp_beta = 0.5 * (np.log(prior_var) - np.log(gp_var_i)) mean = gp_mean * gp_beta / gp_var_i var = gp_beta / gp_var_i + (1 - gp_beta) / prior_var pred_var = 1.0 / var pred_mean = pred_var * mean assert pred_mean == rbcm_pred[0] assert pred_var == rbcm_pred[1]
def predict(self, x_t: AtomicEnvironment, d: int) -> [float, float]: """ Predict a force component of the central atom of a local environment. Performs prediction over each expert and combines results. Weights beta_i for experts computed on a per-expert basis following Cao and Fleet 2014: https://arxiv.org/abs/1410.7827 Which Liu et al (https://arxiv.org/pdf/1806.00720.pdf) describe as "the difference in differential entropy between the prior and the posterior". Args: x_t (AtomicEnvironment): Input local environment. i (integer): Force component to predict (1=x, 2=y, 3=z). Return: (float, float): Mean and epistemic variance of the prediction. """ assert d in [1, 2, 3], "d must be 1, 2 ,or 3." # Kernel vector allows for evaluation of atomic environments. if self.parallel and not self.per_atom_par: n_cpus = self.n_cpus else: n_cpus = 1 self.sync_data() k_v = [] for i in range(self.n_experts): k_v += [ get_kernel_vector( f"{self.name}_{i}", self.kernel, self.energy_force_kernel, x_t, d, self.hyps, cutoffs=self.cutoffs, hyps_mask=self.hyps_mask, n_cpus=n_cpus, n_sample=self.n_sample, ) ] # Guarantee that alpha is up to date with training set self.check_L_alpha() # get predictive mean variance_rbcm = 0 mean = 0.0 var = 0.0 beta = 0.0 # Represents expert weight args = from_mask_to_args(self.hyps, self.cutoffs, self.hyps_mask) for i in range(self.n_experts): mean_i = np.matmul(self.alpha[i], k_v[i]) # get predictive variance without cholesky (possibly faster) # pass args to kernel based on if mult. hyperparameters in use self_kern = self.kernel(x_t, x_t, d, d, *args) var_i = self_kern - np.matmul( np.matmul(k_v[i].T, self.ky_mat_inv[i]), k_v[i]) beta_i = 0.5 * (self.log_prior_var - np.log(var_i) ) # This expert's weight mean += mean_i * beta_i / var_i var += beta_i / var_i beta += beta_i var += (1 - beta) / self.prior_variance pred_var = 1.0 / var pred_mean = pred_var * mean return pred_mean, pred_var