def test_load_one_frame_and_run(): the_gp = GaussianProcess(kernel=two_plus_three_body_mc, kernel_grad=two_plus_three_body_mc_grad, hyps=np.array([ 3.75996759e-06, 1.53990678e-02, 2.50624782e-05, 5.07884426e-01, 1.70172923e-03 ]), cutoffs=np.array([7, 7]), hyp_labels=['l2', 's2', 'l3', 's3', 'n0'], maxiter=1, opt_algorithm='L-BFGS-B') with open('./test_files/methanol_frames.json', 'r') as f: frames = [Structure.from_dict(loads(s)) for s in f.readlines()] tt = TrajectoryTrainer(frames, gp=the_gp, shuffle_frames=True, rel_std_tolerance=0, abs_std_tolerance=0, skip=15) tt.run() os.system('rm ./gp_from_aimd.gp') os.system('rm ./gp_from_aimd.out') os.system('rm ./gp_from_aimd.xyz') os.system('rm ./gp_from_aimd-f.xyz')
def test_load_one_frame_and_run(): the_gp = GaussianProcess( kernel_name="2+3_mc", hyps=np.array([ 3.75996759e-06, 1.53990678e-02, 2.50624782e-05, 5.07884426e-01, 1.70172923e-03, ]), cutoffs=np.array([5, 3]), hyp_labels=["l2", "s2", "l3", "s3", "n0"], maxiter=1, opt_algorithm="L-BFGS-B", ) with open(path.join(TEST_FILE_DIR, "methanol_frames.json"), "r") as f: frames = [Structure.from_dict(loads(s)) for s in f.readlines()] tt = TrajectoryTrainer( frames, gp=the_gp, shuffle_frames=True, print_as_xyz=True, rel_std_tolerance=0, abs_std_tolerance=0, skip=15, ) tt.run() for f in glob(f"gp_from_aimd*"): remove(f)
def test_pred_on_elements(): the_gp = GaussianProcess(kernel_name="2+3_mc", hyps=np.array([ 3.75996759e-06, 1.53990678e-02, 2.50624782e-05, 5.07884426e-01, 1.70172923e-03 ]), cutoffs=np.array([7, 3]), hyp_labels=['l2', 's2', 'l3', 's3', 'n0'], maxiter=1, opt_algorithm='L-BFGS-B') with open('./test_files/methanol_frames.json', 'r') as f: frames = [Structure.from_dict(loads(s)) for s in f.readlines()] with open('./test_files/methanol_envs.json', 'r') as f: data_dicts = [loads(s) for s in f.readlines()[:6]] envs = [AtomicEnvironment.from_dict(d) for d in data_dicts] forces = [np.array(d['forces']) for d in data_dicts] seeds = list(zip(envs, forces)) all_frames = deepcopy(frames) tt = TrajectoryTrainer(frames, gp=the_gp, shuffle_frames=False, rel_std_tolerance=0, abs_std_tolerance=0, abs_force_tolerance=.001, skip=5, min_atoms_per_train=100, pre_train_seed_envs=seeds, pre_train_seed_frames=[frames[-1]], max_atoms_from_frame=4, output_name='meth_test', model_format='json', atom_checkpoint_interval=50, pre_train_atoms_per_element={'H': 1}, predict_atoms_per_element={ 'H': 0, 'C': 1, 'O': 0 }) # Set to predict only on Carbon after training on H to ensure errors are # high and that they get added to the gp tt.run() # Ensure forces weren't written directly to structure for i in range(len(all_frames)): assert np.array_equal(all_frames[i].forces, frames[i].forces) # Assert that Carbon atoms were correctly added assert the_gp.training_statistics['envs_by_species']['C'] > 2 for f in glob(f"meth_test*"): remove(f) for f in glob(f"gp_from_aimd*"): remove(f)
def test_mgp_gpfa(all_mgp, all_gp): """ Ensure that passing in an MGP also works for the trajectory trainer :param all_mgp: :param all_gp: :return: """ np.random.seed(10) gp_model = get_gp("3", "mc", False) gp_model.set_L_alpha() grid_num_3 = 3 lower_cut = 0.01 grid_params_3b = { "lower_bound": [lower_cut] * 3, "grid_num": [grid_num_3] * 3, "svd_rank": "auto", } grid_params = {"load_grid": None, "update": False} grid_params["threebody"] = grid_params_3b unique_species = gp_model.training_statistics["species"] mgp_model = MappedGaussianProcess(grid_params=grid_params, unique_species=unique_species, n_cpus=1) mgp_model.build_map(gp_model) nenv = 10 cell = np.eye(3) struc, f = get_random_structure(cell, unique_species, nenv) struc.forces = np.array(f) frames = [struc] tt = TrajectoryTrainer( frames, mgp_model, rel_std_tolerance=0, abs_std_tolerance=0, abs_force_tolerance=1e-8, print_training_plan=True, ) assert tt.gp_is_mapped is True tt.run() # Test that training plan is properly written with open("gp_from_aimd_training_plan.json", "r") as f: plan = json.loads(f.readline()) assert isinstance(plan["0"], list) assert len(plan["0"]) == len(struc) assert [p[0] for p in plan["0"]] == list(range(len(struc))) for f in glob(f"gp_from_aimd*"): remove(f)
def test_instantiation_of_trajectory_trainer(fake_gp): a = TrajectoryTrainer(frames=[], gp=fake_gp) assert isinstance(a, TrajectoryTrainer) _ = TrajectoryTrainer([], fake_gp, parallel=True, calculate_energy=True) _ = TrajectoryTrainer([], fake_gp, parallel=True, calculate_energy=False) _ = TrajectoryTrainer([], fake_gp, parallel=False, calculate_energy=True) _ = TrajectoryTrainer([], fake_gp, parallel=False, calculate_energy=False)
def test_seed_and_run(): the_gp = GaussianProcess( kernel_name="2+3_mc", hyps=np.array([ 3.75996759e-06, 1.53990678e-02, 2.50624782e-05, 5.07884426e-01, 1.70172923e-03, ]), cutoffs=np.array([5, 3]), hyp_labels=["l2", "s2", "l3", "s3", "n0"], maxiter=1, opt_algorithm="L-BFGS-B", ) with open(path.join(TEST_FILE_DIR, "methanol_frames.json"), "r") as f: frames = [Structure.from_dict(loads(s)) for s in f.readlines()] with open(path.join(TEST_FILE_DIR, "methanol_envs.json"), "r") as f: data_dicts = [loads(s) for s in f.readlines()[:6]] envs = [AtomicEnvironment.from_dict(d) for d in data_dicts] forces = [np.array(d["forces"]) for d in data_dicts] seeds = list(zip(envs, forces)) tt = TrajectoryTrainer( frames, gp=the_gp, shuffle_frames=True, rel_std_tolerance=0, abs_std_tolerance=0, skip=10, pre_train_seed_envs=seeds, pre_train_seed_frames=[frames[-1]], max_atoms_from_frame=4, output_name="meth_test", model_format="pickle", train_checkpoint_interval=1, pre_train_atoms_per_element={"H": 1}, ) tt.run() with open("meth_test_model.pickle", "rb") as f: new_gp = pickle.load(f) test_env = envs[0] for d in [1, 2, 3]: assert np.all( the_gp.predict(x_t=test_env, d=d) == new_gp.predict(x_t=test_env, d=d)) for f in glob(f"meth_test*"): remove(f)
def test_load_trained_gp_and_run(methanol_gp): with open('./test_files/methanol_frames.json', 'r') as f: frames = [Structure.from_dict(loads(s)) for s in f.readlines()] tt = TrajectoryTrainer(frames, gp=methanol_gp, rel_std_tolerance=0, abs_std_tolerance=0, skip=15) tt.run() os.system('rm ./gp_from_aimd*')
def test_seed_and_run(): the_gp = GaussianProcess(kernel=two_plus_three_body_mc, kernel_grad=two_plus_three_body_mc_grad, hyps=np.array([ 3.75996759e-06, 1.53990678e-02, 2.50624782e-05, 5.07884426e-01, 1.70172923e-03 ]), cutoffs=np.array([7, 7]), hyp_labels=['l2', 's2', 'l3', 's3', 'n0'], maxiter=1, opt_algorithm='L-BFGS-B') with open('./test_files/methanol_frames.json', 'r') as f: frames = [Structure.from_dict(loads(s)) for s in f.readlines()] with open('./test_files/methanol_envs.json', 'r') as f: data_dicts = [loads(s) for s in f.readlines()[:6]] envs = [AtomicEnvironment.from_dict(d) for d in data_dicts] forces = [np.array(d['forces']) for d in data_dicts] seeds = list(zip(envs, forces)) tt = TrajectoryTrainer(frames, gp=the_gp, shuffle_frames=True, rel_std_tolerance=0, abs_std_tolerance=0, skip=15, pre_train_seed_envs=seeds, pre_train_seed_frames=[frames[-1]], max_atoms_from_frame=4, model_write='meth_test.pickle', model_format='pickle', checkpoint_interval=1, pre_train_atoms_per_element={'H': 1}) tt.run() with open('meth_test.pickle', 'rb') as f: new_gp = pickle.load(f) test_env = envs[0] for d in [0, 1, 2]: assert np.all( the_gp.predict(x_t=test_env, d=d) == new_gp.predict(x_t=test_env, d=d)) os.system('rm ./gp_from_aimd.out') os.system('rm ./gp_from_aimd.xyz') os.system('rm ./gp_from_aimd-f.xyz') os.system('rm ./meth_test.pickle')
def test_load_trained_gp_and_run(methanol_gp): with open('./test_files/methanol_frames.json', 'r') as f: frames = [Structure.from_dict(loads(s)) for s in f.readlines()] tt = TrajectoryTrainer(frames, gp=methanol_gp, rel_std_tolerance=0, abs_std_tolerance=0, skip=15, train_checkpoint_interval=10) tt.run() for f in glob(f"gp_from_aimd*"): remove(f)
def test_load_trained_gp_and_run(methanol_gp): with open(path.join(TEST_FILE_DIR, "methanol_frames.json"), "r") as f: frames = [Structure.from_dict(loads(s)) for s in f.readlines()] tt = TrajectoryTrainer( frames, gp=methanol_gp, rel_std_tolerance=0, abs_std_tolerance=0, skip=15, train_checkpoint_interval=10, ) tt.run() for f in glob(f"gp_from_aimd*"): remove(f)
def test_load_one_frame_and_run(): the_gp = GaussianProcess(kernel_name="2+3_mc", hyps=np.array([ 3.75996759e-06, 1.53990678e-02, 2.50624782e-05, 5.07884426e-01, 1.70172923e-03 ]), cutoffs=np.array([7, 7]), hyp_labels=['l2', 's2', 'l3', 's3', 'n0'], maxiter=1, opt_algorithm='L-BFGS-B') with open('./test_files/methanol_frames.json', 'r') as f: frames = [Structure.from_dict(loads(s)) for s in f.readlines()] tt = TrajectoryTrainer(frames, gp=the_gp, shuffle_frames=True, rel_std_tolerance=0, abs_std_tolerance=0, skip=15) tt.run() for f in glob(f"gp_from_aimd*"): remove(f)
def test_mgp_gpfa(all_mgp, all_gp): ''' Ensure that passing in an MGP also works for the trajectory trainer :param all_mgp: :param all_gp: :return: ''' gp_model = get_gp('3', 'mc', False) gp_model.set_L_alpha() grid_num_2 = 5 grid_num_3 = 3 lower_cut = 0.01 two_cut = gp_model.cutoffs[0] three_cut = gp_model.cutoffs[1] # set struc params. cell and masses arbitrary? mapped_cell = np.eye(3) * 2 struc_params = { 'species': [1, 2], 'cube_lat': mapped_cell, 'mass_dict': { '0': 27, '1': 16 } } # grid parameters train_size = len(gp_model.training_data) grid_params = { 'bodies': [2], 'cutoffs': gp_model.cutoffs, 'bounds_2': [[lower_cut], [two_cut]], 'bounds_3': [[lower_cut, lower_cut, lower_cut], [three_cut, three_cut, three_cut]], 'grid_num_2': grid_num_2, 'grid_num_3': [grid_num_3, grid_num_3, grid_num_3], 'svd_rank_2': np.min((grid_num_2, 3 * train_size)), 'svd_rank_3': np.min((grid_num_3**3, 3 * train_size)), 'load_grid': None, 'update': False } struc_params = { 'species': [1, 2], 'cube_lat': np.eye(3) * 2, 'mass_dict': { '0': 27, '1': 16 } } mgp_model = MappedGaussianProcess(grid_params, struc_params) mgp_model.build_map(gp_model) nenv = 10 cell = np.eye(3) unique_species = gp_model.training_data[0].species struc, f = get_random_structure(cell, unique_species, nenv) struc.forces = np.array(f) frames = [struc] tt = TrajectoryTrainer(frames, mgp_model, rel_std_tolerance=0, abs_std_tolerance=0, abs_force_tolerance=0) assert tt.mgp is True tt.run()
def test_active_learning_simple_run(): """ Test simple mechanics of active learning method. :return: """ the_gp = GaussianProcess( kernel_name="2+3_mc", hyps=np.array([ 3.75996759e-06, 1.53990678e-02, 2.50624782e-05, 5.07884426e-01, 1.70172923e-03, ]), cutoffs=np.array([5, 3]), hyp_labels=["l2", "s2", "l3", "s3", "n0"], maxiter=1, opt_algorithm="L-BFGS-B", ) frames = Structure.from_file( path.join(TEST_FILE_DIR, "methanol_frames.json")) # Assign fake energies to structures for frame in frames: frame.energy = np.random.random() tt = TrajectoryTrainer(gp=the_gp, include_energies=True) tt.run_passive_learning( frames=frames[:1], max_elts_per_frame={ "C": 1, "O": 1, "H": 1 }, post_training_iterations=0, post_build_matrices=True, ) assert len(the_gp.training_structures) == 1 prev_gp_len = len(the_gp) prev_gp_stats = the_gp.training_statistics tt.run_active_learning(frames[:2], rel_std_tolerance=0, abs_std_tolerance=0, abs_force_tolerance=0) assert len(the_gp) == prev_gp_len # Try on a frame where the Carbon atom is guaranteed to trip the # abs. force tolerance condition. # Turn off include energies so that the number of training structures # does not change. tt.include_energies = False tt.run_active_learning( frames[1:2], rel_std_tolerance=0, abs_std_tolerance=0, abs_force_tolerance=0.1, max_elts_per_frame={ "H": 0, "O": 0 }, max_model_elts={"C": 2}, ) assert len(the_gp) == prev_gp_len + 1 assert len(the_gp.training_structures) == 1 prev_carbon_atoms = prev_gp_stats["envs_by_species"]["C"] assert the_gp.training_statistics["envs_by_species"][ "C"] == prev_carbon_atoms + 1 prev_gp_len = len(the_gp) tt.run_active_learning( frames[3:4], rel_std_tolerance=0, abs_std_tolerance=0, abs_force_tolerance=0.1, max_model_size=prev_gp_len, ) assert len(the_gp) == prev_gp_len # Test that model doesn't add atoms prev_gp_len = len(the_gp) tt.run_active_learning( frames[5:6], rel_std_tolerance=0, abs_std_tolerance=0, abs_force_tolerance=0.1, max_model_elts={ "C": 2, "H": 1, "O": 1 }, ) assert len(the_gp) == prev_gp_len for f in glob(f"gp_from_aimd*"): remove(f)
def test_passive_learning(): the_gp = GaussianProcess( kernel_name="2+3_mc", hyps=np.array([ 3.75996759e-06, 1.53990678e-02, 2.50624782e-05, 5.07884426e-01, 1.70172923e-03, ]), cutoffs=np.array([5, 3]), hyp_labels=["l2", "s2", "l3", "s3", "n0"], maxiter=1, opt_algorithm="L-BFGS-B", ) frames = Structure.from_file( path.join(TEST_FILE_DIR, "methanol_frames.json")) envs = AtomicEnvironment.from_file( path.join(TEST_FILE_DIR, "methanol_envs.json")) cur_gp = deepcopy(the_gp) tt = TrajectoryTrainer(frames=None, gp=cur_gp) # TEST ENVIRONMENT ADDITION envs_species = set(Z_to_element(env.ctype) for env in envs) tt.run_passive_learning(environments=envs, post_build_matrices=False) assert cur_gp.training_statistics["N"] == len(envs) assert set(cur_gp.training_statistics["species"]) == envs_species # TEST FRAME ADDITION: ALL ARE ADDED cur_gp = deepcopy(the_gp) tt.gp = cur_gp tt.run_passive_learning(frames=frames, post_build_matrices=False) assert len(cur_gp.training_data) == sum([len(fr) for fr in frames]) # TEST FRAME ADDITION: MAX OUT MODEL SIZE AT 1 cur_gp = deepcopy(the_gp) tt.gp = cur_gp tt.run_passive_learning(frames=frames, max_model_size=1, post_training_iterations=1) assert len(cur_gp.training_data) == 1 # TEST FRAME ADDITION: EXCLUDE OXYGEN, LIMIT CARBON TO 1, 1 H PER FRAME cur_gp = deepcopy(the_gp) tt.gp = cur_gp tt.run_passive_learning( frames=frames, max_model_elts={ "O": 0, "C": 1, "H": 5 }, max_elts_per_frame={"H": 1}, post_build_matrices=False, ) assert "O" not in cur_gp.training_statistics["species"] assert cur_gp.training_statistics["envs_by_species"]["C"] == 1 assert cur_gp.training_statistics["envs_by_species"]["H"] == 5
def test_pred_on_elements(): the_gp = GaussianProcess( kernel_name="2+3_mc", hyps=np.array([ 3.75996759e-06, 1.53990678e-02, 2.50624782e-05, 5.07884426e-01, 1.70172923e-03, ]), cutoffs=np.array([5, 3]), hyp_labels=["l2", "s2", "l3", "s3", "n0"], maxiter=1, opt_algorithm="L-BFGS-B", ) with open(path.join(TEST_FILE_DIR, "methanol_frames.json"), "r") as f: frames = [Structure.from_dict(loads(s)) for s in f.readlines()] with open(path.join(TEST_FILE_DIR, "methanol_envs.json"), "r") as f: data_dicts = [loads(s) for s in f.readlines()[:6]] envs = [AtomicEnvironment.from_dict(d) for d in data_dicts] forces = [np.array(d["forces"]) for d in data_dicts] seeds = list(zip(envs, forces)) all_frames = deepcopy(frames) tt = TrajectoryTrainer( frames, gp=the_gp, shuffle_frames=False, rel_std_tolerance=0, abs_std_tolerance=0, abs_force_tolerance=0.001, skip=5, min_atoms_per_train=100, pre_train_seed_envs=seeds, pre_train_seed_frames=[frames[-1]], max_atoms_from_frame=4, output_name="meth_test", print_as_xyz=True, model_format="json", atom_checkpoint_interval=50, pre_train_atoms_per_element={"H": 1}, predict_atoms_per_element={ "H": 0, "C": 1, "O": 0 }, ) # Set to predict only on Carbon after training on H to ensure errors are # high and that they get added to the gp tt.run() # Ensure forces weren't written directly to structure for i in range(len(all_frames)): assert np.array_equal(all_frames[i].forces, frames[i].forces) # Assert that Carbon atoms were correctly added assert the_gp.training_statistics["envs_by_species"]["C"] > 2 for f in glob(f"meth_test*"): remove(f) for f in glob(f"gp_from_aimd*"): remove(f)
def test_uncertainty_threshold(fake_gp): tt = TrajectoryTrainer([], fake_gp, rel_std_tolerance=.5, abs_std_tolerance=.01) fake_structure = Structure(cell=np.eye(3), species=["H"], positions=np.array([[0, 0, 0]])) # Test a structure with no variance passes fake_structure.stds = np.array([[0, 0, 0]]) res1, res2 = tt.is_std_in_bound(fake_structure) assert res1 is True assert res2 == [-1] # Test that the absolute criteria trips the threshold fake_structure.stds = np.array([[.02, 0, 0]]) res1, res2 = tt.is_std_in_bound(fake_structure) assert res1 is False assert res2 == [0] tt.abs_std_tolerance = 100 # Test that the relative criteria trips the threshold fake_structure.stds = np.array([[.6, 0, 0]]) res1, res2 = tt.is_std_in_bound(fake_structure) assert res1 is False assert res2 == [0] # Test that 'test mode' works, where no GP modification occurs tt.abs_std_tolerance = 0 tt.rel_std_tolerance = 0 res1, res2 = tt.is_std_in_bound(fake_structure) assert res1 is True assert res2 == [-1] # Test permutations of one / another being off tt.abs_std_tolerance = 1 tt.rel_std_tolerance = 0 res1, res2 = tt.is_std_in_bound(fake_structure) assert res1 is True assert res2 == [-1] tt.abs_std_tolerance = 0 tt.rel_std_tolerance = 1 res1, res2 = tt.is_std_in_bound(fake_structure) assert res1 is True assert res2 == [-1]