예제 #1
0
def test_active_learning_simple_run():
    """
    Test simple mechanics of active learning method.
    :return:
    """

    the_gp = GaussianProcess(
        kernel_name="2+3_mc",
        hyps=np.array([
            3.75996759e-06,
            1.53990678e-02,
            2.50624782e-05,
            5.07884426e-01,
            1.70172923e-03,
        ]),
        cutoffs=np.array([5, 3]),
        hyp_labels=["l2", "s2", "l3", "s3", "n0"],
        maxiter=1,
        opt_algorithm="L-BFGS-B",
    )

    frames = Structure.from_file(
        path.join(TEST_FILE_DIR, "methanol_frames.json"))

    # Assign fake energies to structures
    for frame in frames:
        frame.energy = np.random.random()

    tt = TrajectoryTrainer(gp=the_gp, include_energies=True)

    tt.run_passive_learning(
        frames=frames[:1],
        max_elts_per_frame={
            "C": 1,
            "O": 1,
            "H": 1
        },
        post_training_iterations=0,
        post_build_matrices=True,
    )

    assert len(the_gp.training_structures) == 1
    prev_gp_len = len(the_gp)
    prev_gp_stats = the_gp.training_statistics
    tt.run_active_learning(frames[:2],
                           rel_std_tolerance=0,
                           abs_std_tolerance=0,
                           abs_force_tolerance=0)
    assert len(the_gp) == prev_gp_len
    # Try on a frame where the Carbon atom is guaranteed to trip the
    # abs. force tolerance condition.
    # Turn off include energies so that the number of training structures
    # does not change.
    tt.include_energies = False
    tt.run_active_learning(
        frames[1:2],
        rel_std_tolerance=0,
        abs_std_tolerance=0,
        abs_force_tolerance=0.1,
        max_elts_per_frame={
            "H": 0,
            "O": 0
        },
        max_model_elts={"C": 2},
    )
    assert len(the_gp) == prev_gp_len + 1
    assert len(the_gp.training_structures) == 1
    prev_carbon_atoms = prev_gp_stats["envs_by_species"]["C"]
    assert the_gp.training_statistics["envs_by_species"][
        "C"] == prev_carbon_atoms + 1

    prev_gp_len = len(the_gp)
    tt.run_active_learning(
        frames[3:4],
        rel_std_tolerance=0,
        abs_std_tolerance=0,
        abs_force_tolerance=0.1,
        max_model_size=prev_gp_len,
    )
    assert len(the_gp) == prev_gp_len

    # Test that model doesn't add atoms
    prev_gp_len = len(the_gp)
    tt.run_active_learning(
        frames[5:6],
        rel_std_tolerance=0,
        abs_std_tolerance=0,
        abs_force_tolerance=0.1,
        max_model_elts={
            "C": 2,
            "H": 1,
            "O": 1
        },
    )
    assert len(the_gp) == prev_gp_len

    for f in glob(f"gp_from_aimd*"):
        remove(f)
예제 #2
0
def test_passive_learning():
    the_gp = GaussianProcess(
        kernel_name="2+3_mc",
        hyps=np.array([
            3.75996759e-06,
            1.53990678e-02,
            2.50624782e-05,
            5.07884426e-01,
            1.70172923e-03,
        ]),
        cutoffs=np.array([5, 3]),
        hyp_labels=["l2", "s2", "l3", "s3", "n0"],
        maxiter=1,
        opt_algorithm="L-BFGS-B",
    )

    frames = Structure.from_file(
        path.join(TEST_FILE_DIR, "methanol_frames.json"))
    envs = AtomicEnvironment.from_file(
        path.join(TEST_FILE_DIR, "methanol_envs.json"))
    cur_gp = deepcopy(the_gp)
    tt = TrajectoryTrainer(frames=None, gp=cur_gp)

    # TEST ENVIRONMENT ADDITION
    envs_species = set(Z_to_element(env.ctype) for env in envs)
    tt.run_passive_learning(environments=envs, post_build_matrices=False)

    assert cur_gp.training_statistics["N"] == len(envs)
    assert set(cur_gp.training_statistics["species"]) == envs_species

    # TEST FRAME ADDITION: ALL ARE ADDED
    cur_gp = deepcopy(the_gp)
    tt.gp = cur_gp
    tt.run_passive_learning(frames=frames, post_build_matrices=False)
    assert len(cur_gp.training_data) == sum([len(fr) for fr in frames])

    # TEST FRAME ADDITION: MAX OUT MODEL SIZE AT 1
    cur_gp = deepcopy(the_gp)
    tt.gp = cur_gp
    tt.run_passive_learning(frames=frames,
                            max_model_size=1,
                            post_training_iterations=1)
    assert len(cur_gp.training_data) == 1

    # TEST FRAME ADDITION: EXCLUDE OXYGEN, LIMIT CARBON TO 1, 1 H PER FRAME
    cur_gp = deepcopy(the_gp)
    tt.gp = cur_gp
    tt.run_passive_learning(
        frames=frames,
        max_model_elts={
            "O": 0,
            "C": 1,
            "H": 5
        },
        max_elts_per_frame={"H": 1},
        post_build_matrices=False,
    )

    assert "O" not in cur_gp.training_statistics["species"]
    assert cur_gp.training_statistics["envs_by_species"]["C"] == 1
    assert cur_gp.training_statistics["envs_by_species"]["H"] == 5