def test_set_L_alpha(two_body_gp, params): # params cell = np.eye(3) unique_species = [2, 1] noa = 2 # create test structure test_structure, forces = get_random_structure(cell, unique_species, noa) # set gp model kernel = en.two_plus_three_body kernel_grad = en.two_plus_three_body_grad hyps = np.array([ 2.23751151e-01, 8.19990316e-01, 1.28421842e-04, 1.07467158e+00, 5.50677932e-02 ]) cutoffs = np.array([5.4, 5.4]) hyp_labels = ['sig2', 'ls2', 'sig3', 'ls3', 'noise'] energy_force_kernel = en.two_plus_three_force_en energy_kernel = en.two_plus_three_en opt_algorithm = 'BFGS' # test update_db gaussian = \ GaussianProcess(kernel, kernel_grad, hyps, cutoffs, hyp_labels, energy_force_kernel, energy_kernel, opt_algorithm, par=True, no_cpus=2) gaussian.update_db(test_structure, forces) gaussian.set_L_alpha()
def two_body_gp() -> GaussianProcess: """Returns a GP instance with a two-body numba-based kernel""" print("\nSetting up...\n") # params cell = np.eye(3) unique_species = [2, 1] cutoffs = np.array([0.8, 0.8]) noa = 5 # create test structure test_structure, forces = get_random_structure(cell, unique_species, noa) # test update_db gaussian = \ GaussianProcess(kernel=en.three_body, kernel_grad=en.three_body_grad, hyps=np.array([1, 1, 1]), hyp_labels=['Length', 'Signal Var.', 'Noise Var.'], par=True, no_cpus=2, cutoffs=cutoffs) gaussian.update_db(test_structure, forces) # return gaussian yield gaussian # code after yield will be executed once all tests are run # this will not be run if an exception is raised in the setup print("\n\nTearing down\n") del gaussian
def two_plus_three_gp() -> GaussianProcess: """Returns a GP instance with a 2+3-body kernel.""" cutoffs = np.array([0.8, 0.8]) hyps = np.array([1.0, 1.0, 1.0, 1.0, 1.0]) # test update_db gpname = "2+3_mc" cutoffs = np.ones(2) * 0.8 gp_model = GaussianProcess( kernel_name=gpname, hyps=hyps, cutoffs=cutoffs, multihyps=False, parallel=False, n_cpus=1, ) test_structure, forces = get_random_structure(np.eye(3), [1, 2], 3) energy = 3.14 gp_model.update_db(test_structure, forces, energy=energy) yield gp_model del gp_model
def three_body_gp() -> GaussianProcess: """Returns a GP instance with a two-body numba-based kernel""" print("\nSetting up...\n") # params cell = np.eye(3) unique_species = [2, 1] cutoffs = np.array([0.8, 0.8]) noa = 5 nbond = 0 ntriplet = 1 hyps, hm = generate_hm(nbond, ntriplet) # create test structure test_structure, forces = get_random_structure(cell, unique_species, noa) # test update_db gaussian = \ GaussianProcess(kernel=en.three_body_mc, kernel_grad=en.three_body_mc_grad, hyps=hyps, hyp_labels=hm['hyps_label'], cutoffs=cutoffs, multihyps=True, hyps_mask=hm) gaussian.update_db(test_structure, forces) # return gaussian yield gaussian # code after yield will be executed once all tests are run # this will not be run if an exception is raised in the setup print("\n\nTearing down\n") del gaussian
def test_training_statistics(): """ Ensure training statistics are being recorded correctly :return: """ test_structure, forces = get_random_structure(np.eye(3), ["H", "Be"], 10) energy = 3.14 gp = GaussianProcess(kernel_name="2", cutoffs=[10]) data = gp.training_statistics assert data["N"] == 0 assert len(data["species"]) == 0 assert len(data["envs_by_species"]) == 0 gp.update_db(test_structure, forces, energy=energy) data = gp.training_statistics assert data["N"] == 10 assert len(data["species"]) == len(set(test_structure.coded_species)) assert len(data["envs_by_species"]) == len( set(test_structure.coded_species))
def test_training_statistics(): """ Ensure training statistics are being recorded correctly :return: """ test_structure, forces = get_random_structure(np.eye(3), ['H', 'Be'], 10) gp = GaussianProcess(kernel_name='2', cutoffs=[10]) data = gp.training_statistics assert data['N'] == 0 assert len(data['species']) == 0 assert len(data['envs_by_species']) == 0 gp.update_db(test_structure, forces) data = gp.training_statistics assert data['N'] == 10 assert len(data['species']) == len(set(test_structure.coded_species)) assert len(data['envs_by_species']) == len(set( test_structure.coded_species))
def get_gp( bodies, kernel_type="mc", multihyps=True, cellabc=[1, 1, 1.5], force_only=False, noa=5, ) -> GaussianProcess: """Returns a GP instance with a two-body numba-based kernel""" print("\nSetting up...\n") # params cell = np.diag(cellabc) unique_species = [1, 2] ntwobody = 0 nthreebody = 0 prefix = bodies if "2" in bodies or "two" in bodies: ntwobody = 1 if "3" in bodies or "three" in bodies: nthreebody = 1 hyps, hm, _ = generate_hm(ntwobody, nthreebody, nmanybody=0, multihyps=multihyps) cutoffs = hm["cutoffs"] kernels = hm["kernels"] hl = hm["hyp_labels"] # create test structure test_structure, forces = get_random_structure(cell, unique_species, noa) energy = 3.14 # test update_db gaussian = GaussianProcess( kernels=kernels, component=kernel_type, hyps=hyps, hyp_labels=hl, cutoffs=cutoffs, hyps_mask=hm, parallel=False, n_cpus=1, ) if force_only: gaussian.update_db(test_structure, forces) else: gaussian.update_db(test_structure, forces, energy=energy) gaussian.check_L_alpha() # print(gaussian.alpha) return gaussian
def two_plus_three_gp() -> GaussianProcess: """Returns a GP instance with a 2+3-body kernel.""" cutoffs = {'twobody': 0.8, 'threebody': 0.8} hyps = np.array([1., 1., 1., 1., 1.]) gp_model = \ GaussianProcess(kernels=['twobody', 'threebody'], hyps=hyps, cutoffs=cutoffs, multihyps=False, parallel=False, n_cpus=1) test_structure, forces = \ get_random_structure(np.eye(3), [1, 2], 3) energy = 3.14 gp_model.update_db(test_structure, forces, energy=energy) yield gp_model del gp_model
def test_to_from_gp(): """ To/from methods for creating new RBCMs and turning them back into GPs :return: """ gp = GaussianProcess() for frame in methanol_frames: gp.update_db(frame, forces=frame.forces) rbcm = RobustBayesianCommitteeMachine.from_gp(gp) new_gp = rbcm.get_full_gp() test_env = methanol_envs[0] for d in range(1, 4): assert np.array_equal(gp.predict(test_env, d), new_gp.predict(test_env, d))
def get_gp(bodies, kernel_type='mc', multihyps=True) -> GaussianProcess: """Returns a GP instance with a two-body numba-based kernel""" print("\nSetting up...\n") # params cell = np.diag(np.array([1, 1, 1.5])) unique_species = [2, 1] cutoffs = np.array([0.8, 0.8]) noa = 5 nbond = 0 ntriplet = 0 prefix = bodies if ('2' in bodies or 'two' in bodies): nbond = 1 if ('3' in bodies or 'three' in bodies): ntriplet = 1 hyps, hm, _ = generate_hm(nbond, ntriplet, multihyps=multihyps) # create test structure test_structure, forces = get_random_structure(cell, unique_species, noa) hl = hm['hyps_label'] if (multihyps is False): hm = None # test update_db gaussian = \ GaussianProcess(kernel_name=f'{prefix}{kernel_type}', hyps=hyps, hyp_labels=hl, cutoffs=cutoffs, multihyps=multihyps, hyps_mask=hm, parallel=False, n_cpus=1) gaussian.update_db(test_structure, forces) gaussian.check_L_alpha() return gaussian
def test_remove_force_data(): """ Train a GP on one fake structure. Store forces from prediction. Add a new fake structure and ensure predictions change; then remove the structure and ensure predictions go back to normal. :return: """ test_structure, forces = get_random_structure(5.0 * np.eye(3), ["H", "Be"], 5) test_structure_2, forces_2 = get_random_structure(5.0 * np.eye(3), ["H", "Be"], 5) gp = GaussianProcess(kernels=["twobody"], cutoffs={"twobody": 0.8}) gp.update_db(test_structure, forces) with raises(ValueError): gp.remove_force_data(1000000) init_forces, init_stds = predict_on_structure(test_structure, gp, write_to_structure=False) init_forces_2, init_stds_2 = predict_on_structure(test_structure_2, gp, write_to_structure=False) # Alternate adding in the entire structure and adding in only one atom. for custom_range in [None, [0]]: # Add in data and ensure the predictions change in reponse gp.update_db(test_structure_2, forces_2, custom_range=custom_range) new_forces, new_stds = predict_on_structure(test_structure, gp, write_to_structure=False) new_forces_2, new_stds_2 = predict_on_structure( test_structure_2, gp, write_to_structure=False) assert not np.array_equal(init_forces, new_forces) assert not np.array_equal(init_forces_2, new_forces_2) assert not np.array_equal(init_stds, new_stds) assert not np.array_equal(init_stds_2, new_stds_2) # Remove that data and test to see that the predictions revert to # what they were previously if custom_range == [0]: popped_strucs, popped_forces = gp.remove_force_data(5) else: popped_strucs, popped_forces = gp.remove_force_data( [5, 6, 7, 8, 9]) for i in range(len(popped_forces)): assert np.array_equal(popped_forces[i], forces_2[i]) assert np.array_equal(popped_strucs[i].structure.positions, test_structure_2.positions) final_forces, final_stds = predict_on_structure( test_structure, gp, write_to_structure=False) final_forces_2, final_stds_2 = predict_on_structure( test_structure_2, gp, write_to_structure=False) assert np.array_equal(init_forces, final_forces) assert np.array_equal(init_stds, final_stds) assert np.array_equal(init_forces_2, final_forces_2) assert np.array_equal(init_stds_2, final_stds_2)
def test_prediction(): """ Test that prediction functions works. The RBCM in the 1-expert case *does not* reduce to a GP's predictions, because the way the mean and variance is computed for each expert is weighted based on the expert's performance on the entire dataset in a way that does not yield 1 in the absence of other experts. Hence, perform the relevant transformations on a GP's prediction and check it against the RBCM's. :return: """ prior_var = 0.1 rbcm = RobustBayesianCommitteeMachine( ndata_per_expert=100, prior_variance=prior_var, ) gp = GaussianProcess() envs = methanol_envs[:10] for env in envs: rbcm.add_one_env(env, env.force) gp.add_one_env(env, env.force, train=False) struc = methanol_frames[-1] gp.update_db(struc, forces=struc.forces) rbcm.update_db(struc, forces=struc.forces) test_env = methanol_envs[-1] for d in [1, 2, 3]: assert np.array_equal(gp.hyps, rbcm.hyps) rbcm_pred = rbcm.predict(test_env, d) gp_pred = gp.predict(test_env, d) gp_kv = get_kernel_vector( gp.name, gp.kernel, gp.energy_force_kernel, test_env, d, gp.hyps, cutoffs=gp.cutoffs, hyps_mask=gp.hyps_mask, n_cpus=1, n_sample=gp.n_sample, ) gp_mean = np.matmul(gp_kv, gp.alpha) assert gp_mean == gp_pred[0] gp_self_kern = gp.kernel( env1=test_env, env2=test_env, d1=d, d2=d, hyps=gp.hyps, cutoffs=np.array((7, 3.5)), ) gp_var_i = gp_self_kern - np.matmul(np.matmul(gp_kv.T, gp.ky_mat_inv), gp_kv) gp_beta = 0.5 * (np.log(prior_var) - np.log(gp_var_i)) mean = gp_mean * gp_beta / gp_var_i var = gp_beta / gp_var_i + (1 - gp_beta) / prior_var pred_var = 1.0 / var pred_mean = pred_var * mean assert pred_mean == rbcm_pred[0] assert pred_var == rbcm_pred[1]
class FlareCalc(FLARE_Calculator, MLPCalc): implemented_properties = ["energy", "forces", "stress", "stds"] def __init__(self, flare_params: dict, initial_images, mgp_model=None, par=False, use_mapping=False, **kwargs): self.initial_images = initial_images self.init_species_map() MLPCalc.__init__(self, mlp_params=flare_params) super().__init__(None, mgp_model=mgp_model, par=par, use_mapping=use_mapping, **kwargs) def init_flare(self): self.gp_model = GaussianProcess(**self.mlp_params) def init_species_map(self): self.species_map = {} a_numbers = [] for image in self.initial_images: a_numbers += np.unique(image.numbers).tolist() a_numbers = np.unique(a_numbers) for i in range(len(a_numbers)): self.species_map[a_numbers[i]] = i def calculate(self, atoms=None, properties=None, system_changes=...): MLPCalc.calculate(self, atoms=atoms, properties=properties, system_changes=system_changes) return super().calculate(atoms=atoms, properties=properties, system_changes=system_changes) def calculate_gp(self, atoms): structure = self.get_descriptor_from_atoms(atoms) super().calculate_gp(structure) self.results["force_stds"] = self.results["stds"] self.results["energy_stds"] = self.results["local_energy_stds"] atoms.info["energy_stds"] = self.results["local_energy_stds"] atoms.info["max_force_stds"] = np.nanmax(self.results["force_stds"]) def train(self, parent_dataset, new_dataset=None): if not self.gp_model or not new_dataset: self.init_flare() self.train_on_dataset(parent_dataset) else: self.train_on_dataset(new_dataset) def train_on_dataset(self, dataset): for atoms in dataset: structure = self.get_descriptor_from_atoms( atoms, energy=atoms.get_potential_energy(), forces=atoms.get_forces()) self.gp_model.update_db( struc=structure, forces=atoms.get_forces(), energy=atoms.get_potential_energy(), ) def get_descriptor_from_atoms(self, atoms, energy=None, forces=None): structure = Structure( cell=atoms.get_cell(), species=[self.species_map[x] for x in atoms.get_atomic_numbers()], positions=atoms.get_positions(), forces=forces, energy=energy, ) return structure