def test_training_statistics(): """ Ensure training statistics are being recorded correctly :return: """ test_structure, forces = get_random_structure(np.eye(3), ['H', 'Be'], 10) gp = GaussianProcess(kernel_name='2', cutoffs=[10]) data = gp.training_statistics assert data['N'] == 0 assert len(data['species']) == 0 assert len(data['envs_by_species']) == 0 gp.update_db(test_structure, forces) data = gp.training_statistics assert data['N'] == 10 assert len(data['species']) == len(set(test_structure.coded_species)) assert len(data['envs_by_species']) == len(set( test_structure.coded_species))
def two_body_gp() -> GaussianProcess: """Returns a GP instance with a two-body numba-based kernel""" print("\nSetting up...\n") # params cell = np.eye(3) unique_species = [2, 1] cutoffs = np.array([0.8, 0.8]) noa = 5 # create test structure test_structure, forces = get_random_structure(cell, unique_species, noa) # test update_db gaussian = \ GaussianProcess(kernel=en.three_body, kernel_grad=en.three_body_grad, hyps=np.array([1, 1, 1]), hyp_labels=['Length', 'Signal Var.', 'Noise Var.'], par=True, no_cpus=2, cutoffs=cutoffs) gaussian.update_db(test_structure, forces) # return gaussian yield gaussian # code after yield will be executed once all tests are run # this will not be run if an exception is raised in the setup print("\n\nTearing down\n") del gaussian
def two_plus_three_gp() -> GaussianProcess: """Returns a GP instance with a 2+3-body kernel.""" cutoffs = np.array([0.8, 0.8]) hyps = np.array([1.0, 1.0, 1.0, 1.0, 1.0]) # test update_db gpname = "2+3_mc" cutoffs = np.ones(2) * 0.8 gp_model = GaussianProcess( kernel_name=gpname, hyps=hyps, cutoffs=cutoffs, multihyps=False, parallel=False, n_cpus=1, ) test_structure, forces = get_random_structure(np.eye(3), [1, 2], 3) energy = 3.14 gp_model.update_db(test_structure, forces, energy=energy) yield gp_model del gp_model
def test_training_statistics(): """ Ensure training statistics are being recorded correctly :return: """ test_structure, forces = get_random_structure(np.eye(3), ["H", "Be"], 10) energy = 3.14 gp = GaussianProcess(kernel_name="2", cutoffs=[10]) data = gp.training_statistics assert data["N"] == 0 assert len(data["species"]) == 0 assert len(data["envs_by_species"]) == 0 gp.update_db(test_structure, forces, energy=energy) data = gp.training_statistics assert data["N"] == 10 assert len(data["species"]) == len(set(test_structure.coded_species)) assert len(data["envs_by_species"]) == len( set(test_structure.coded_species))
def three_body_gp() -> GaussianProcess: """Returns a GP instance with a two-body numba-based kernel""" print("\nSetting up...\n") # params cell = np.eye(3) unique_species = [2, 1] cutoffs = np.array([0.8, 0.8]) noa = 5 nbond = 0 ntriplet = 1 hyps, hm = generate_hm(nbond, ntriplet) # create test structure test_structure, forces = get_random_structure(cell, unique_species, noa) # test update_db gaussian = \ GaussianProcess(kernel=en.three_body_mc, kernel_grad=en.three_body_mc_grad, hyps=hyps, hyp_labels=hm['hyps_label'], cutoffs=cutoffs, multihyps=True, hyps_mask=hm) gaussian.update_db(test_structure, forces) # return gaussian yield gaussian # code after yield will be executed once all tests are run # this will not be run if an exception is raised in the setup print("\n\nTearing down\n") del gaussian
def test_load_and_reload(self, all_gps, validation_env, multihyps): test_gp = all_gps[multihyps] test_gp.write_model('test_gp_write', 'pickle') new_gp = GaussianProcess.from_file('test_gp_write.pickle') for d in [0, 1, 2]: assert np.all( test_gp.predict(x_t=validation_env, d=d) == new_gp.predict( x_t=validation_env, d=d)) os.remove('test_gp_write.pickle') test_gp.write_model('test_gp_write', 'json') with open('test_gp_write.json', 'r') as f: new_gp = GaussianProcess.from_dict(json.loads(f.readline())) for d in [0, 1, 2]: assert np.all( test_gp.predict(x_t=validation_env, d=d) == new_gp.predict( x_t=validation_env, d=d)) os.remove('test_gp_write.json') with raises(ValueError): test_gp.write_model('test_gp_write', 'cucumber')
def predict_on_structure_en( structure: Structure, gp: GaussianProcess, n_cpus: int = None) -> ('np.ndarray', 'np.ndarray', 'np.ndarray'): """ Return the forces/std. dev. uncertainty / local energy associated with each individual atom in a structure. Forces are stored directly to the structure and are also returned. :param structure: FLARE structure to obtain forces for, with N atoms :param gp: Gaussian Process model :param n_cpus: Dummy parameter passed as an argument to allow for flexibility when the callable may or may not be parallelized :return: N x 3 array of forces, N x 3 array of uncertainties, N-length array of energies :rtype: (np.ndarray, np.ndarray, np.ndarray) """ # Set up local energy array local_energies = np.array([0 for _ in range(structure.nat)]) # Loop through atoms in structure and predict forces, uncertainties, # and energies for n in range(structure.nat): chemenv = AtomicEnvironment(structure, n, gp.cutoffs) for i in range(3): force, var = gp.predict(chemenv, i + 1) structure.forces[n][i] = float(force) structure.stds[n][i] = np.sqrt(np.abs(var)) local_energies[n] = gp.predict_local_energy(chemenv) forces = np.array(structure.forces) stds = np.array(structure.stds) return forces, stds, local_energies
def predict_on_structure_en( structure: Structure, gp: GaussianProcess, n_cpus: int = None, write_to_structure: bool = True, selective_atoms: List[int] = None, skipped_atom_value=0) -> ('np.ndarray', 'np.ndarray', 'np.ndarray'): """ Return the forces/std. dev. uncertainty / local energy associated with each individual atom in a structure. Forces are stored directly to the structure and are also returned. :param structure: FLARE structure to obtain forces for, with N atoms :param gp: Gaussian Process model :param n_cpus: Dummy parameter passed as an argument to allow for flexibility when the callable may or may not be parallelized :return: N x 3 array of forces, N x 3 array of uncertainties, N-length array of energies :rtype: (np.ndarray, np.ndarray, np.ndarray) """ # Set up local energy array forces = np.zeros((structure.nat, 3)) stds = np.zeros((structure.nat, 3)) local_energies = np.zeros(structure.nat) forces = np.zeros(shape=(structure.nat, 3)) stds = np.zeros(shape=(structure.nat, 3)) if selective_atoms: forces.fill(skipped_atom_value) stds.fill(skipped_atom_value) local_energies.fill(skipped_atom_value) else: selective_atoms = [] # Loop through atoms in structure and predict forces, uncertainties, # and energies for n in range(structure.nat): if selective_atoms and n not in selective_atoms: continue chemenv = AtomicEnvironment(structure, n, gp.cutoffs, cutoffs_mask=gp.hyps_mask) for i in range(3): force, var = gp.predict(chemenv, i + 1) forces[n][i] = float(force) stds[n][i] = np.sqrt(np.abs(var)) if write_to_structure and structure.forces is not None: structure.forces[n][i] = float(force) structure.stds[n][i] = np.sqrt(np.abs(var)) local_energies[n] = gp.predict_local_energy(chemenv) return forces, stds, local_energies
def test_seed_and_run(): the_gp = GaussianProcess( kernel_name="2+3_mc", hyps=np.array([ 3.75996759e-06, 1.53990678e-02, 2.50624782e-05, 5.07884426e-01, 1.70172923e-03, ]), cutoffs=np.array([5, 3]), hyp_labels=["l2", "s2", "l3", "s3", "n0"], maxiter=1, opt_algorithm="L-BFGS-B", ) with open(path.join(TEST_FILE_DIR, "methanol_frames.json"), "r") as f: frames = [Structure.from_dict(loads(s)) for s in f.readlines()] with open(path.join(TEST_FILE_DIR, "methanol_envs.json"), "r") as f: data_dicts = [loads(s) for s in f.readlines()[:6]] envs = [AtomicEnvironment.from_dict(d) for d in data_dicts] forces = [np.array(d["forces"]) for d in data_dicts] seeds = list(zip(envs, forces)) tt = TrajectoryTrainer( frames, gp=the_gp, shuffle_frames=True, rel_std_tolerance=0, abs_std_tolerance=0, skip=10, pre_train_seed_envs=seeds, pre_train_seed_frames=[frames[-1]], max_atoms_from_frame=4, output_name="meth_test", model_format="pickle", train_checkpoint_interval=1, pre_train_atoms_per_element={"H": 1}, ) tt.run() with open("meth_test_model.pickle", "rb") as f: new_gp = pickle.load(f) test_env = envs[0] for d in [1, 2, 3]: assert np.all( the_gp.predict(x_t=test_env, d=d) == new_gp.predict(x_t=test_env, d=d)) for f in glob(f"meth_test*"): remove(f)
def predict_on_atom_en(structure: Structure, atom: int, gp: GaussianProcess): chemenv = AtomicEnvironment(structure, atom, gp.cutoffs) comps = [] stds = [] # predict force components and standard deviations for i in range(3): force, var = gp.predict(chemenv, i + 1) comps.append(float(force)) stds.append(np.sqrt(np.abs(var))) # predict local energy local_energy = gp.predict_local_energy(chemenv) return comps, stds, local_energy
def test_seed_and_run(): the_gp = GaussianProcess(kernel=two_plus_three_body_mc, kernel_grad=two_plus_three_body_mc_grad, hyps=np.array([ 3.75996759e-06, 1.53990678e-02, 2.50624782e-05, 5.07884426e-01, 1.70172923e-03 ]), cutoffs=np.array([7, 7]), hyp_labels=['l2', 's2', 'l3', 's3', 'n0'], maxiter=1, opt_algorithm='L-BFGS-B') with open('./test_files/methanol_frames.json', 'r') as f: frames = [Structure.from_dict(loads(s)) for s in f.readlines()] with open('./test_files/methanol_envs.json', 'r') as f: data_dicts = [loads(s) for s in f.readlines()[:6]] envs = [AtomicEnvironment.from_dict(d) for d in data_dicts] forces = [np.array(d['forces']) for d in data_dicts] seeds = list(zip(envs, forces)) tt = TrajectoryTrainer(frames, gp=the_gp, shuffle_frames=True, rel_std_tolerance=0, abs_std_tolerance=0, skip=15, pre_train_seed_envs=seeds, pre_train_seed_frames=[frames[-1]], max_atoms_from_frame=4, model_write='meth_test.pickle', model_format='pickle', checkpoint_interval=1, pre_train_atoms_per_element={'H': 1}) tt.run() with open('meth_test.pickle', 'rb') as f: new_gp = pickle.load(f) test_env = envs[0] for d in [0, 1, 2]: assert np.all( the_gp.predict(x_t=test_env, d=d) == new_gp.predict(x_t=test_env, d=d)) os.system('rm ./gp_from_aimd.out') os.system('rm ./gp_from_aimd.xyz') os.system('rm ./gp_from_aimd-f.xyz') os.system('rm ./meth_test.pickle')
def predict_on_structure_en(structure: Structure, gp: GaussianProcess): local_energies = [0 for _ in range(structure.nat)] for n in range(structure.nat): chemenv = AtomicEnvironment(structure, n, gp.cutoffs) for i in range(3): force, var = gp.predict(chemenv, i + 1) structure.forces[n][i] = float(force) structure.stds[n][i] = np.sqrt(np.abs(var)) local_energies[n] = gp.predict_local_energy(chemenv) forces = np.array(structure.forces) stds = np.array(structure.stds) return forces, stds, local_energies
def restart(self): # Recover atomic configuration: positions, velocities, forces positions, self.nsteps = self.read_frame('positions.xyz', -1) self.atoms.set_positions(positions) self.atoms.set_velocities(self.read_frame('velocities.dat', -1)[0]) self.atoms.calc.results['forces'] = self.read_frame('forces.dat', -1)[0] print('Last frame recovered') # # Recover training data set # gp_model = self.atoms.calc.gp_model # atoms = deepcopy(self.atoms) # nat = len(self.atoms.positions) # dft_positions = self.read_all_frames('dft_positions.xyz', nat) # dft_forces = self.read_all_frames('dft_forces.dat', nat) # added_atoms = self.read_all_frames('added_atoms.dat', 1, 1, 'int') # for i, frame in enumerate(dft_positions): # atoms.set_positions(frame) # curr_struc = Structure.from_ase_atoms(atoms) # gp_model.update_db(curr_struc, dft_forces[i], added_atoms[i].tolist()) # gp_model.set_L_alpha() # print('GP training set ready') # Recover FLARE calculator self.atoms.calc.gp_model = GaussianProcess.from_file(self.restart_from+'/gp_model.pickle') # gp_model.ky_mat_inv = np.load(self.restart_from+'/ky_mat_inv.npy') # gp_model.alpha = np.load(self.restart_from+'/alpha.npy') if self.atoms.calc.use_mapping: for map_3 in self.atoms.calc.mgp_model.maps_3: map_3.load_grid = self.restart_from + '/' self.atoms.calc.build_mgp(skip=False) self.atoms.calc.mgp_updated = True print('GP and MGP ready') self.l_bound = 10
def test_load_one_frame_and_run(): the_gp = GaussianProcess( kernel_name="2+3_mc", hyps=np.array([ 3.75996759e-06, 1.53990678e-02, 2.50624782e-05, 5.07884426e-01, 1.70172923e-03, ]), cutoffs=np.array([5, 3]), hyp_labels=["l2", "s2", "l3", "s3", "n0"], maxiter=1, opt_algorithm="L-BFGS-B", ) with open(path.join(TEST_FILE_DIR, "methanol_frames.json"), "r") as f: frames = [Structure.from_dict(loads(s)) for s in f.readlines()] tt = TrajectoryTrainer( frames, gp=the_gp, shuffle_frames=True, print_as_xyz=True, rel_std_tolerance=0, abs_std_tolerance=0, skip=15, ) tt.run() for f in glob(f"gp_from_aimd*"): remove(f)
def test_load_reload_huge(self, all_gps): """ Unit tests that loading and reloading a huge GP works. :param all_gps: :return: """ test_gp = deepcopy(all_gps[False]) test_gp.set_L_alpha() dummy_gp = deepcopy(test_gp) N_data = len(dummy_gp.training_data) prev_ky_mat = deepcopy(dummy_gp.ky_mat) prev_l_mat = deepcopy(dummy_gp.l_mat) for model_format in ["pickle", "json"]: dummy_gp.write_model("test_gp_write", model_format, N_data - 1) new_gp = GaussianProcess.from_file(f"test_gp_write.{model_format}") assert np.allclose(prev_ky_mat, new_gp.ky_mat) assert np.allclose(prev_l_mat, new_gp.l_mat) assert new_gp.training_data is not test_gp.training_data os.remove(f"test_gp_write.{model_format}") dummy_gp = deepcopy(test_gp) os.remove(f"test_gp_write_ky_mat.npy")
def all_gps() -> GaussianProcess: """Returns a GP instance with a two-body numba-based kernel""" gp_dict = {True: None, False: None} for multihyps in multihyps_list: hyps, hm, cutoffs = generate_hm(1, 1, multihyps=multihyps) hl = hm["hyp_labels"] # test update_db gp_dict[multihyps] = GaussianProcess( kernels=hm["kernels"], hyps=hyps, hyp_labels=hl, cutoffs=cutoffs, hyps_mask=hm, parallel=False, n_cpus=1, ) test_structure, forces = get_random_structure(np.eye(3), [1, 2], 3) energy = 3.14 gp_dict[multihyps].update_db(test_structure, forces, energy=energy) yield gp_dict del gp_dict
def test_serialization_method(two_body_gp, test_point): """ Serialize and then un-serialize a GP and ensure that no info was lost. Compare one calculation to ensure predictions work correctly. :param two_body_gp: :return: """ old_gp_dict = two_body_gp.as_dict() new_gp = GaussianProcess.from_dict(old_gp_dict) new_gp_dict = new_gp.as_dict() assert len(new_gp_dict) == len(old_gp_dict) for k1, k2 in zip(sorted(new_gp_dict.keys()), sorted(old_gp_dict.keys())): x = new_gp_dict[k1] y = new_gp_dict[k2] if isinstance(x, np.ndarray): assert np.equal(x, y).all() elif hasattr(x, '__len__'): if isinstance(x[0], np.ndarray): assert np.equal(x, y).all() else: for xx, yy in zip(x, y): assert xx == yy else: assert x == y for d in [0, 1, 2]: assert np.all( two_body_gp.predict(x_t=test_point, d=d) == new_gp.predict( x_t=test_point, d=d))
def test_load_one_frame_and_run(): the_gp = GaussianProcess(kernel=two_plus_three_body_mc, kernel_grad=two_plus_three_body_mc_grad, hyps=np.array([ 3.75996759e-06, 1.53990678e-02, 2.50624782e-05, 5.07884426e-01, 1.70172923e-03 ]), cutoffs=np.array([7, 7]), hyp_labels=['l2', 's2', 'l3', 's3', 'n0'], maxiter=1, opt_algorithm='L-BFGS-B') with open('./test_files/methanol_frames.json', 'r') as f: frames = [Structure.from_dict(loads(s)) for s in f.readlines()] tt = TrajectoryTrainer(frames, gp=the_gp, shuffle_frames=True, rel_std_tolerance=0, abs_std_tolerance=0, skip=15) tt.run() os.system('rm ./gp_from_aimd.gp') os.system('rm ./gp_from_aimd.out') os.system('rm ./gp_from_aimd.xyz') os.system('rm ./gp_from_aimd-f.xyz')
def predict_on_structure(structure: Structure, gp: GaussianProcess, n_cpus: int = None) -> ('np.ndarray', 'np.ndarray'): """ Return the forces/std. dev. uncertainty associated with each individual atom in a structure. Forces are stored directly to the structure and are also returned. :param structure: FLARE structure to obtain forces for, with N atoms :param gp: Gaussian Process model :return: N x 3 numpy array of foces, Nx3 numpy array of uncertainties :rtype: (np.ndarray, np.ndarray) """ # Loop through individual atoms, cast to atomic environments, # make predictions for n in range(structure.nat): chemenv = AtomicEnvironment(structure, n, gp.cutoffs) for i in range(3): force, var = gp.predict(chemenv, i + 1) structure.forces[n][i] = float(force) structure.stds[n][i] = np.sqrt(np.abs(var)) forces = np.array(structure.forces) stds = np.array(structure.stds) return forces, stds
def test_permuted_initalization(self): """ Run through some common permutations of input sequences to ensure that the GP correctly initializes. """ for kernel_list in [["2"], ["3"], ["2", "3"]]: GaussianProcess(kernels=kernel_list) with raises(ValueError): GaussianProcess(kernels=["2", "3", "mb"]) full_kernel_list = ["2", "3"] for component in ["sc", "mc"]: GaussianProcess(kernels=full_kernel_list, component=component) for parallel in [True, False]: GaussianProcess(parallel=parallel) for per_atom_par in [True, False]: GaussianProcess(per_atom_par=per_atom_par)
def all_gps() -> GaussianProcess: """Returns a GP instance with a two-body numba-based kernel""" gp_dict = {True: None, False: None} for multihyps in multihyps_list: cutoffs = np.ones(2)*0.8 hyps, hm, _ = generate_hm(1, 1, multihyps=multihyps) hl = hm['hyps_label'] if (multihyps is False): hm = None # test update_db gpname = '2+3+mb_mc' hyps = np.hstack([hyps, [1, 1]]) hl = np.hstack([hl[:-1], ['sigm', 'lsm'], hl[-1]]) cutoffs = np.ones(3)*0.8 gp_dict[multihyps] = \ GaussianProcess(kernel_name=gpname, hyps=hyps, hyp_labels=hl, cutoffs=cutoffs, multihyps=multihyps, hyps_mask=hm, parallel=False, n_cpus=1) test_structure, forces = get_random_structure(np.eye(3), [1, 2], 3) gp_dict[multihyps].update_db(test_structure, forces) yield gp_dict del gp_dict
def test_load_and_reload(two_body_gp, test_point): two_body_gp.write_model('two_body', 'pickle') with open('two_body.pickle', 'rb') as f: new_gp = pickle.load(f) for d in [0, 1, 2]: assert np.all( two_body_gp.predict(x_t=test_point, d=d) == new_gp.predict( x_t=test_point, d=d)) os.remove('two_body.pickle') two_body_gp.write_model('two_body', 'json') with open('two_body.json', 'r') as f: new_gp = GaussianProcess.from_dict(json.loads(f.readline())) for d in [0, 1, 2]: assert np.all( two_body_gp.predict(x_t=test_point, d=d) == new_gp.predict( x_t=test_point, d=d)) os.remove('two_body.json') with raises(ValueError): two_body_gp.write_model('two_body', 'cucumber')
def test_otf_parser_from_checkpt(software): if not os.environ.get(cmd[software], False): pytest.skip(f"{cmd[software]} not found in environment:" " Please install the code " f" and set the {cmd[software]} env. " "variable to point to the executable.") if software == "cp2k": pytest.skip() example = 1 casename = name_list[example] log_name = f"{casename}_otf_{software}" output_name = f"{log_name}.out" otf_traj = OtfAnalysis(output_name) try: replicated_gp = otf_traj.make_gp() except: init_gp = GaussianProcess.from_file(log_name + "_gp.json") replicated_gp = otf_traj.make_gp(init_gp=init_gp) outdir = f"test_outputs_{software}" if not os.path.isdir(outdir): os.mkdir(outdir) for f in os.listdir("./"): if f"{casename}_otf_{software}" in f: shutil.move(f, outdir) cleanup(software, f"{casename}_otf_{software}")
def get_gp( bodies, kernel_type="mc", multihyps=True, cellabc=[1, 1, 1.5], force_only=False, noa=5, ) -> GaussianProcess: """Returns a GP instance with a two-body numba-based kernel""" print("\nSetting up...\n") # params cell = np.diag(cellabc) unique_species = [1, 2] ntwobody = 0 nthreebody = 0 prefix = bodies if "2" in bodies or "two" in bodies: ntwobody = 1 if "3" in bodies or "three" in bodies: nthreebody = 1 hyps, hm, _ = generate_hm(ntwobody, nthreebody, nmanybody=0, multihyps=multihyps) cutoffs = hm["cutoffs"] kernels = hm["kernels"] hl = hm["hyp_labels"] # create test structure test_structure, forces = get_random_structure(cell, unique_species, noa) energy = 3.14 # test update_db gaussian = GaussianProcess( kernels=kernels, component=kernel_type, hyps=hyps, hyp_labels=hl, cutoffs=cutoffs, hyps_mask=hm, parallel=False, n_cpus=1, ) if force_only: gaussian.update_db(test_structure, forces) else: gaussian.update_db(test_structure, forces, energy=energy) gaussian.check_L_alpha() # print(gaussian.alpha) return gaussian
def test_pred_on_elements(): the_gp = GaussianProcess(kernel_name="2+3_mc", hyps=np.array([ 3.75996759e-06, 1.53990678e-02, 2.50624782e-05, 5.07884426e-01, 1.70172923e-03 ]), cutoffs=np.array([7, 3]), hyp_labels=['l2', 's2', 'l3', 's3', 'n0'], maxiter=1, opt_algorithm='L-BFGS-B') with open('./test_files/methanol_frames.json', 'r') as f: frames = [Structure.from_dict(loads(s)) for s in f.readlines()] with open('./test_files/methanol_envs.json', 'r') as f: data_dicts = [loads(s) for s in f.readlines()[:6]] envs = [AtomicEnvironment.from_dict(d) for d in data_dicts] forces = [np.array(d['forces']) for d in data_dicts] seeds = list(zip(envs, forces)) all_frames = deepcopy(frames) tt = TrajectoryTrainer(frames, gp=the_gp, shuffle_frames=False, rel_std_tolerance=0, abs_std_tolerance=0, abs_force_tolerance=.001, skip=5, min_atoms_per_train=100, pre_train_seed_envs=seeds, pre_train_seed_frames=[frames[-1]], max_atoms_from_frame=4, output_name='meth_test', model_format='json', atom_checkpoint_interval=50, pre_train_atoms_per_element={'H': 1}, predict_atoms_per_element={ 'H': 0, 'C': 1, 'O': 0 }) # Set to predict only on Carbon after training on H to ensure errors are # high and that they get added to the gp tt.run() # Ensure forces weren't written directly to structure for i in range(len(all_frames)): assert np.array_equal(all_frames[i].forces, frames[i].forces) # Assert that Carbon atoms were correctly added assert the_gp.training_statistics['envs_by_species']['C'] > 2 for f in glob(f"meth_test*"): remove(f) for f in glob(f"gp_from_aimd*"): remove(f)
def two_plus_three_gp() -> GaussianProcess: """Returns a GP instance with a 2+3-body kernel.""" cutoffs = {'twobody': 0.8, 'threebody': 0.8} hyps = np.array([1., 1., 1., 1., 1.]) gp_model = \ GaussianProcess(kernels=['twobody', 'threebody'], hyps=hyps, cutoffs=cutoffs, multihyps=False, parallel=False, n_cpus=1) test_structure, forces = \ get_random_structure(np.eye(3), [1, 2], 3) energy = 3.14 gp_model.update_db(test_structure, forces, energy=energy) yield gp_model del gp_model
def test_constrained_optimization_simple(): """ Test constrained optimization with a standard number of hyperparameters (3 for a 3-body) :return: """ # params cell = np.eye(3) species = [1, 1, 2, 2, 2] positions = np.random.uniform(0, 1, (5, 3)) forces = np.random.uniform(0, 1, (5, 3)) two_species_structure = Structure(cell=cell, species=species, positions=positions, forces=forces) hyp_labels = [ '2-Body_sig2,', '2-Body_l2', '3-Body_sig2', '3-Body_l2', 'noise' ] hyps = np.array([1.2, 2.2, 3.2, 4.2, 12.]) cutoffs = np.array((.8, .8)) # Define hyp masks spec_mask = np.zeros(118, dtype=int) spec_mask[1] = 1 hyps_mask = { 'nspec': 2, 'spec_mask': spec_mask, 'nbond': 2, 'bond_mask': [0, 1, 1, 1], 'ntriplet': 2, 'triplet_mask': [0, 1, 1, 1, 1, 1, 1, 1], 'original': np.array([1.1, 1.2, 2.1, 2.2, 3.1, 3.2, 4.1, 4.2, 12.]), 'train_noise': True, 'map': [1, 3, 5, 7, 8] } gp = GaussianProcess(kernel=en.two_plus_three_body_mc, kernel_grad=en.two_plus_three_body_mc_grad, hyps=hyps, hyp_labels=hyp_labels, cutoffs=cutoffs, par=False, n_cpus=1, hyps_mask=hyps_mask, maxiter=1, multihyps=True) gp.update_db(two_species_structure, two_species_structure.forces) # Check that the hyperparameters were updated results = gp.train() assert not np.equal(results.x, hyps).all()
def __init__( self, n_experts: int = 1, ndata_per_expert: int = 200, prior_variance: float = 0.5, per_expert_parallel: bool = True, **kwargs, ): self.n_experts = n_experts self.prior_variance = prior_variance self.log_prior_var = np.log(prior_variance) self.ndata_per_expert = ndata_per_expert self.per_expert_parallel = per_expert_parallel GaussianProcess.__init__(self, **kwargs) # Index of which expert is currently addressed self.current_expert = 0 self.reset_container()
def test_otf_al(): """ Test that an otf run can survive going for more steps :return: """ os.system('cp ./test_files/qe_input_2.in ./pwscf.in') # make gp model kernel = en.three_body kernel_grad = en.three_body_grad hyps = np.array([0.1, 1, 0.01]) hyp_labels = ['Signal Std', 'Length Scale', 'Noise Std'] cutoffs = np.array([3.9, 3.9]) energy_force_kernel = en.three_body_force_en gp = \ GaussianProcess(kernel=kernel, kernel_grad=kernel_grad, hyps=hyps, cutoffs=cutoffs, hyp_labels=hyp_labels, energy_force_kernel=energy_force_kernel, maxiter=50) # set up DFT calculator qe_input = './pwscf.in' # quantum espresso input file dft_loc = os.environ.get('PWSCF_COMMAND') # set up OTF parameters dt = 0.001 # timestep (ps) number_of_steps = 100 # number of steps std_tolerance_factor = 1 max_atoms_added = 2 freeze_hyps = 3 otf = OTF(qe_input, dt, number_of_steps, gp, dft_loc, std_tolerance_factor, init_atoms=[0], calculate_energy=True, output_name='al_otf_qe', freeze_hyps=freeze_hyps, skip=5, max_atoms_added=max_atoms_added) # run OTF MD otf.run() os.system('mkdir test_outputs') os.system('mv al_otf_qe* test_outputs') cleanup_espresso_run()
def predict_on_structure( structure: Structure, gp: GaussianProcess, n_cpus: int = None, write_to_structure: bool = True, selective_atoms: List[int] = None, skipped_atom_value=0, ) -> ("np.ndarray", "np.ndarray"): """ Return the forces/std. dev. uncertainty associated with each individual atom in a structure. Forces are stored directly to the structure and are also returned. :param structure: FLARE structure to obtain forces for, with N atoms :param gp: Gaussian Process model :param write_to_structure: Write results to structure's forces, std attributes :param selective_atoms: Only predict on these atoms; e.g. [0,1,2] will only predict and return for those atoms :param skipped_atom_value: What value to use for atoms that are skipped. Defaults to 0 but other options could be e.g. NaN. Will NOT write this to the structure if write_to_structure is True. :return: N x 3 numpy array of foces, Nx3 numpy array of uncertainties :rtype: (np.ndarray, np.ndarray) """ forces = np.zeros((structure.nat, 3)) stds = np.zeros((structure.nat, 3)) if selective_atoms: forces.fill(skipped_atom_value) stds.fill(skipped_atom_value) else: selective_atoms = [] for n in range(structure.nat): # Skip the atoms which we aren't predicting on if # selective atoms is on. if n not in selective_atoms and selective_atoms: continue chemenv = AtomicEnvironment(structure, n, gp.cutoffs, cutoffs_mask=gp.hyps_mask) force, var = gp.predict_force_xyz(chemenv) std = np.sqrt(np.abs(var)) forces[n] = force stds[n] = std if write_to_structure: structure.forces[n] = force structure.stds[n] = std return forces, stds