def test_active_learning_simple_run(): """ Test simple mechanics of active learning method. :return: """ the_gp = GaussianProcess( kernel_name="2+3_mc", hyps=np.array([ 3.75996759e-06, 1.53990678e-02, 2.50624782e-05, 5.07884426e-01, 1.70172923e-03, ]), cutoffs=np.array([5, 3]), hyp_labels=["l2", "s2", "l3", "s3", "n0"], maxiter=1, opt_algorithm="L-BFGS-B", ) frames = Structure.from_file( path.join(TEST_FILE_DIR, "methanol_frames.json")) # Assign fake energies to structures for frame in frames: frame.energy = np.random.random() tt = TrajectoryTrainer(gp=the_gp, include_energies=True) tt.run_passive_learning( frames=frames[:1], max_elts_per_frame={ "C": 1, "O": 1, "H": 1 }, post_training_iterations=0, post_build_matrices=True, ) assert len(the_gp.training_structures) == 1 prev_gp_len = len(the_gp) prev_gp_stats = the_gp.training_statistics tt.run_active_learning(frames[:2], rel_std_tolerance=0, abs_std_tolerance=0, abs_force_tolerance=0) assert len(the_gp) == prev_gp_len # Try on a frame where the Carbon atom is guaranteed to trip the # abs. force tolerance condition. # Turn off include energies so that the number of training structures # does not change. tt.include_energies = False tt.run_active_learning( frames[1:2], rel_std_tolerance=0, abs_std_tolerance=0, abs_force_tolerance=0.1, max_elts_per_frame={ "H": 0, "O": 0 }, max_model_elts={"C": 2}, ) assert len(the_gp) == prev_gp_len + 1 assert len(the_gp.training_structures) == 1 prev_carbon_atoms = prev_gp_stats["envs_by_species"]["C"] assert the_gp.training_statistics["envs_by_species"][ "C"] == prev_carbon_atoms + 1 prev_gp_len = len(the_gp) tt.run_active_learning( frames[3:4], rel_std_tolerance=0, abs_std_tolerance=0, abs_force_tolerance=0.1, max_model_size=prev_gp_len, ) assert len(the_gp) == prev_gp_len # Test that model doesn't add atoms prev_gp_len = len(the_gp) tt.run_active_learning( frames[5:6], rel_std_tolerance=0, abs_std_tolerance=0, abs_force_tolerance=0.1, max_model_elts={ "C": 2, "H": 1, "O": 1 }, ) assert len(the_gp) == prev_gp_len for f in glob(f"gp_from_aimd*"): remove(f)
def test_passive_learning(): the_gp = GaussianProcess( kernel_name="2+3_mc", hyps=np.array([ 3.75996759e-06, 1.53990678e-02, 2.50624782e-05, 5.07884426e-01, 1.70172923e-03, ]), cutoffs=np.array([5, 3]), hyp_labels=["l2", "s2", "l3", "s3", "n0"], maxiter=1, opt_algorithm="L-BFGS-B", ) frames = Structure.from_file( path.join(TEST_FILE_DIR, "methanol_frames.json")) envs = AtomicEnvironment.from_file( path.join(TEST_FILE_DIR, "methanol_envs.json")) cur_gp = deepcopy(the_gp) tt = TrajectoryTrainer(frames=None, gp=cur_gp) # TEST ENVIRONMENT ADDITION envs_species = set(Z_to_element(env.ctype) for env in envs) tt.run_passive_learning(environments=envs, post_build_matrices=False) assert cur_gp.training_statistics["N"] == len(envs) assert set(cur_gp.training_statistics["species"]) == envs_species # TEST FRAME ADDITION: ALL ARE ADDED cur_gp = deepcopy(the_gp) tt.gp = cur_gp tt.run_passive_learning(frames=frames, post_build_matrices=False) assert len(cur_gp.training_data) == sum([len(fr) for fr in frames]) # TEST FRAME ADDITION: MAX OUT MODEL SIZE AT 1 cur_gp = deepcopy(the_gp) tt.gp = cur_gp tt.run_passive_learning(frames=frames, max_model_size=1, post_training_iterations=1) assert len(cur_gp.training_data) == 1 # TEST FRAME ADDITION: EXCLUDE OXYGEN, LIMIT CARBON TO 1, 1 H PER FRAME cur_gp = deepcopy(the_gp) tt.gp = cur_gp tt.run_passive_learning( frames=frames, max_model_elts={ "O": 0, "C": 1, "H": 5 }, max_elts_per_frame={"H": 1}, post_build_matrices=False, ) assert "O" not in cur_gp.training_statistics["species"] assert cur_gp.training_statistics["envs_by_species"]["C"] == 1 assert cur_gp.training_statistics["envs_by_species"]["H"] == 5