def predict_on_structure_en( structure: Structure, gp: GaussianProcess, n_cpus: int = None) -> ('np.ndarray', 'np.ndarray', 'np.ndarray'): """ Return the forces/std. dev. uncertainty / local energy associated with each individual atom in a structure. Forces are stored directly to the structure and are also returned. :param structure: FLARE structure to obtain forces for, with N atoms :param gp: Gaussian Process model :param n_cpus: Dummy parameter passed as an argument to allow for flexibility when the callable may or may not be parallelized :return: N x 3 array of forces, N x 3 array of uncertainties, N-length array of energies :rtype: (np.ndarray, np.ndarray, np.ndarray) """ # Set up local energy array local_energies = np.array([0 for _ in range(structure.nat)]) # Loop through atoms in structure and predict forces, uncertainties, # and energies for n in range(structure.nat): chemenv = AtomicEnvironment(structure, n, gp.cutoffs) for i in range(3): force, var = gp.predict(chemenv, i + 1) structure.forces[n][i] = float(force) structure.stds[n][i] = np.sqrt(np.abs(var)) local_energies[n] = gp.predict_local_energy(chemenv) forces = np.array(structure.forces) stds = np.array(structure.stds) return forces, stds, local_energies
def predict_on_structure(structure: Structure, gp: GaussianProcess, n_cpus: int = None) -> ('np.ndarray', 'np.ndarray'): """ Return the forces/std. dev. uncertainty associated with each individual atom in a structure. Forces are stored directly to the structure and are also returned. :param structure: FLARE structure to obtain forces for, with N atoms :param gp: Gaussian Process model :return: N x 3 numpy array of foces, Nx3 numpy array of uncertainties :rtype: (np.ndarray, np.ndarray) """ # Loop through individual atoms, cast to atomic environments, # make predictions for n in range(structure.nat): chemenv = AtomicEnvironment(structure, n, gp.cutoffs) for i in range(3): force, var = gp.predict(chemenv, i + 1) structure.forces[n][i] = float(force) structure.stds[n][i] = np.sqrt(np.abs(var)) forces = np.array(structure.forces) stds = np.array(structure.stds) return forces, stds
def predict_on_structure_en( structure: Structure, gp: GaussianProcess, n_cpus: int = None, write_to_structure: bool = True, selective_atoms: List[int] = None, skipped_atom_value=0) -> ('np.ndarray', 'np.ndarray', 'np.ndarray'): """ Return the forces/std. dev. uncertainty / local energy associated with each individual atom in a structure. Forces are stored directly to the structure and are also returned. :param structure: FLARE structure to obtain forces for, with N atoms :param gp: Gaussian Process model :param n_cpus: Dummy parameter passed as an argument to allow for flexibility when the callable may or may not be parallelized :return: N x 3 array of forces, N x 3 array of uncertainties, N-length array of energies :rtype: (np.ndarray, np.ndarray, np.ndarray) """ # Set up local energy array forces = np.zeros((structure.nat, 3)) stds = np.zeros((structure.nat, 3)) local_energies = np.zeros(structure.nat) forces = np.zeros(shape=(structure.nat, 3)) stds = np.zeros(shape=(structure.nat, 3)) if selective_atoms: forces.fill(skipped_atom_value) stds.fill(skipped_atom_value) local_energies.fill(skipped_atom_value) else: selective_atoms = [] # Loop through atoms in structure and predict forces, uncertainties, # and energies for n in range(structure.nat): if selective_atoms and n not in selective_atoms: continue chemenv = AtomicEnvironment(structure, n, gp.cutoffs, cutoffs_mask=gp.hyps_mask) for i in range(3): force, var = gp.predict(chemenv, i + 1) forces[n][i] = float(force) stds[n][i] = np.sqrt(np.abs(var)) if write_to_structure and structure.forces is not None: structure.forces[n][i] = float(force) structure.stds[n][i] = np.sqrt(np.abs(var)) local_energies[n] = gp.predict_local_energy(chemenv) return forces, stds, local_energies
def predict_on_structure(structure: Structure, gp: GaussianProcess): for n in range(structure.nat): chemenv = AtomicEnvironment(structure, n, gp.cutoffs) for i in range(3): force, var = gp.predict(chemenv, i + 1) structure.forces[n][i] = float(force) structure.stds[n][i] = np.sqrt(np.abs(var)) forces = np.array(structure.forces) stds = np.array(structure.stds) return forces, stds
def test_seed_and_run(): the_gp = GaussianProcess( kernel_name="2+3_mc", hyps=np.array([ 3.75996759e-06, 1.53990678e-02, 2.50624782e-05, 5.07884426e-01, 1.70172923e-03, ]), cutoffs=np.array([5, 3]), hyp_labels=["l2", "s2", "l3", "s3", "n0"], maxiter=1, opt_algorithm="L-BFGS-B", ) with open(path.join(TEST_FILE_DIR, "methanol_frames.json"), "r") as f: frames = [Structure.from_dict(loads(s)) for s in f.readlines()] with open(path.join(TEST_FILE_DIR, "methanol_envs.json"), "r") as f: data_dicts = [loads(s) for s in f.readlines()[:6]] envs = [AtomicEnvironment.from_dict(d) for d in data_dicts] forces = [np.array(d["forces"]) for d in data_dicts] seeds = list(zip(envs, forces)) tt = TrajectoryTrainer( frames, gp=the_gp, shuffle_frames=True, rel_std_tolerance=0, abs_std_tolerance=0, skip=10, pre_train_seed_envs=seeds, pre_train_seed_frames=[frames[-1]], max_atoms_from_frame=4, output_name="meth_test", model_format="pickle", train_checkpoint_interval=1, pre_train_atoms_per_element={"H": 1}, ) tt.run() with open("meth_test_model.pickle", "rb") as f: new_gp = pickle.load(f) test_env = envs[0] for d in [1, 2, 3]: assert np.all( the_gp.predict(x_t=test_env, d=d) == new_gp.predict(x_t=test_env, d=d)) for f in glob(f"meth_test*"): remove(f)
def predict_on_structure(structure: Structure, gp: GaussianProcess, n_cpus: int = None, write_to_structure: bool = True, selective_atoms: List[int] = None, skipped_atom_value=0) \ -> ('np.ndarray', 'np.ndarray'): """ Return the forces/std. dev. uncertainty associated with each individual atom in a structure. Forces are stored directly to the structure and are also returned. :param structure: FLARE structure to obtain forces for, with N atoms :param gp: Gaussian Process model :param write_to_structure: Write results to structure's forces, std attributes :param selective_atoms: Only predict on these atoms; e.g. [0,1,2] will only predict and return for those atoms :param skipped_atom_value: What value to use for atoms that are skipped. Defaults to 0 but other options could be e.g. NaN. Will NOT write this to the structure if write_to_structure is True. :return: N x 3 numpy array of foces, Nx3 numpy array of uncertainties :rtype: (np.ndarray, np.ndarray) """ forces = np.zeros((structure.nat, 3)) stds = np.zeros((structure.nat, 3)) if selective_atoms: forces.fill(skipped_atom_value) stds.fill(skipped_atom_value) else: selective_atoms = [] for n in range(structure.nat): # Skip the atoms which we aren't predicting on if # selective atoms is on. if n not in selective_atoms and selective_atoms: continue chemenv = AtomicEnvironment(structure, n, gp.cutoffs, cutoffs_mask=gp.hyps_mask) for i in range(3): force, var = gp.predict(chemenv, i + 1) forces[n][i] = float(force) stds[n][i] = float(np.sqrt(np.absolute(var))) if write_to_structure: structure.forces[n][i] = force structure.stds[n][i] = np.sqrt(np.abs(var)) return forces, stds
def predict_on_atom_en(structure: Structure, atom: int, gp: GaussianProcess): chemenv = AtomicEnvironment(structure, atom, gp.cutoffs) comps = [] stds = [] # predict force components and standard deviations for i in range(3): force, var = gp.predict(chemenv, i + 1) comps.append(float(force)) stds.append(np.sqrt(np.abs(var))) # predict local energy local_energy = gp.predict_local_energy(chemenv) return comps, stds, local_energy
def test_seed_and_run(): the_gp = GaussianProcess(kernel=two_plus_three_body_mc, kernel_grad=two_plus_three_body_mc_grad, hyps=np.array([ 3.75996759e-06, 1.53990678e-02, 2.50624782e-05, 5.07884426e-01, 1.70172923e-03 ]), cutoffs=np.array([7, 7]), hyp_labels=['l2', 's2', 'l3', 's3', 'n0'], maxiter=1, opt_algorithm='L-BFGS-B') with open('./test_files/methanol_frames.json', 'r') as f: frames = [Structure.from_dict(loads(s)) for s in f.readlines()] with open('./test_files/methanol_envs.json', 'r') as f: data_dicts = [loads(s) for s in f.readlines()[:6]] envs = [AtomicEnvironment.from_dict(d) for d in data_dicts] forces = [np.array(d['forces']) for d in data_dicts] seeds = list(zip(envs, forces)) tt = TrajectoryTrainer(frames, gp=the_gp, shuffle_frames=True, rel_std_tolerance=0, abs_std_tolerance=0, skip=15, pre_train_seed_envs=seeds, pre_train_seed_frames=[frames[-1]], max_atoms_from_frame=4, model_write='meth_test.pickle', model_format='pickle', checkpoint_interval=1, pre_train_atoms_per_element={'H': 1}) tt.run() with open('meth_test.pickle', 'rb') as f: new_gp = pickle.load(f) test_env = envs[0] for d in [0, 1, 2]: assert np.all( the_gp.predict(x_t=test_env, d=d) == new_gp.predict(x_t=test_env, d=d)) os.system('rm ./gp_from_aimd.out') os.system('rm ./gp_from_aimd.xyz') os.system('rm ./gp_from_aimd-f.xyz') os.system('rm ./meth_test.pickle')
def predict_on_structure_en(structure: Structure, gp: GaussianProcess, no_cpus=None): local_energies = [0 for _ in range(structure.nat)] for n in range(structure.nat): chemenv = AtomicEnvironment(structure, n, gp.cutoffs) for i in range(3): force, var = gp.predict(chemenv, i + 1) structure.forces[n][i] = float(force) structure.stds[n][i] = np.sqrt(np.abs(var)) local_energies[n] = gp.predict_local_energy(chemenv) forces = np.array(structure.forces) stds = np.array(structure.stds) return forces, stds, local_energies
def predict_on_atom(structure: Structure, atom: int, gp: GaussianProcess): """ Return the forces/std. dev. uncertainty associated with an atom in a structure :param structure: :param atom: :param gp: :return: """ chemenv = AtomicEnvironment(structure, atom, gp.cutoffs) components = [] stds = [] # predict force components and standard deviations for i in range(3): force, var = gp.predict(chemenv, i + 1) components.append(float(force)) stds.append(np.sqrt(np.abs(var))) return np.array(components), np.array(stds)
def test_to_from_gp(): """ To/from methods for creating new RBCMs and turning them back into GPs :return: """ gp = GaussianProcess() for frame in methanol_frames: gp.update_db(frame, forces=frame.forces) rbcm = RobustBayesianCommitteeMachine.from_gp(gp) new_gp = rbcm.get_full_gp() test_env = methanol_envs[0] for d in range(1, 4): assert np.array_equal(gp.predict(test_env, d), new_gp.predict(test_env, d))
def test_prediction(): """ Test that prediction functions works. The RBCM in the 1-expert case *does not* reduce to a GP's predictions, because the way the mean and variance is computed for each expert is weighted based on the expert's performance on the entire dataset in a way that does not yield 1 in the absence of other experts. Hence, perform the relevant transformations on a GP's prediction and check it against the RBCM's. :return: """ prior_var = 0.1 rbcm = RobustBayesianCommitteeMachine( ndata_per_expert=100, prior_variance=prior_var, ) gp = GaussianProcess() envs = methanol_envs[:10] for env in envs: rbcm.add_one_env(env, env.force) gp.add_one_env(env, env.force, train=False) struc = methanol_frames[-1] gp.update_db(struc, forces=struc.forces) rbcm.update_db(struc, forces=struc.forces) test_env = methanol_envs[-1] for d in [1, 2, 3]: assert np.array_equal(gp.hyps, rbcm.hyps) rbcm_pred = rbcm.predict(test_env, d) gp_pred = gp.predict(test_env, d) gp_kv = get_kernel_vector( gp.name, gp.kernel, gp.energy_force_kernel, test_env, d, gp.hyps, cutoffs=gp.cutoffs, hyps_mask=gp.hyps_mask, n_cpus=1, n_sample=gp.n_sample, ) gp_mean = np.matmul(gp_kv, gp.alpha) assert gp_mean == gp_pred[0] gp_self_kern = gp.kernel( env1=test_env, env2=test_env, d1=d, d2=d, hyps=gp.hyps, cutoffs=np.array((7, 3.5)), ) gp_var_i = gp_self_kern - np.matmul(np.matmul(gp_kv.T, gp.ky_mat_inv), gp_kv) gp_beta = 0.5 * (np.log(prior_var) - np.log(gp_var_i)) mean = gp_mean * gp_beta / gp_var_i var = gp_beta / gp_var_i + (1 - gp_beta) / prior_var pred_var = 1.0 / var pred_mean = pred_var * mean assert pred_mean == rbcm_pred[0] assert pred_var == rbcm_pred[1]
test_structure, forces = \ get_random_structure(np.eye(3), [1, 2], 3) energy = 3.14 gp_model.update_db(test_structure, forces, energy=energy) yield gp_model del gp_model _fake_gp = GaussianProcess(kernel_name='2+3', cutoffs=[5., 5.], hyps=[1., 1., 1., 1., 1.]) _fake_structure = Structure(cell=np.eye(3), species=[1, 1, 1], positions=np.random.uniform(0, 1, size=(3, 3))) _fake_gp.predict = fake_predict _fake_gp.predict_local_energy = fake_predict_local_energy assert isinstance(_fake_gp.predict(1, 1), tuple) assert isinstance(_fake_gp.predict_local_energy(1), float) @pytest.mark.parametrize('n_cpu', [None, 1, 2]) def test_predict_on_structure_par(n_cpu): # Predict only on the first atom, and make rest NAN selective_atoms = [0] skipped_atom_value = np.nan forces, stds = predict_on_structure_par( _fake_structure, _fake_gp, n_cpus=n_cpu, write_to_structure=False,
from flare.predict import predict_on_structure, predict_on_structure_par import pytest def fake_predict(x, d): return np.random.uniform(-1, 1), np.random.uniform(-1, 1) _fake_gp = GaussianProcess(kernel_name='2_sc', cutoffs=[5], hyps=[1, 1, 1]) _fake_structure = Structure(cell=np.eye(3), species=[1, 1, 1], positions=np.random.uniform(0, 1, size=(3, 3))) _fake_gp.predict = fake_predict #lambda _, __: ( #np.random.uniform(-1, 1), np.random.uniform(-1, 1)) print(_fake_gp.predict(1, 2)) @pytest.mark.parametrize('n_cpu', [1, 2]) def test_predict_on_structure_par(n_cpu): # Predict only on the first atom, and make rest NAN selective_atoms = [0] skipped_atom_value = np.nan forces, stds = predict_on_structure_par(