Esempio n. 1
0
def test_set_L_alpha(two_body_gp, params):
    # params
    cell = np.eye(3)
    unique_species = [2, 1]
    noa = 2

    # create test structure
    test_structure, forces = get_random_structure(cell, unique_species, noa)

    # set gp model
    kernel = en.two_plus_three_body
    kernel_grad = en.two_plus_three_body_grad
    hyps = np.array([
        2.23751151e-01, 8.19990316e-01, 1.28421842e-04, 1.07467158e+00,
        5.50677932e-02
    ])
    cutoffs = np.array([5.4, 5.4])
    hyp_labels = ['sig2', 'ls2', 'sig3', 'ls3', 'noise']
    energy_force_kernel = en.two_plus_three_force_en
    energy_kernel = en.two_plus_three_en
    opt_algorithm = 'BFGS'

    # test update_db
    gaussian = \
        GaussianProcess(kernel, kernel_grad, hyps, cutoffs, hyp_labels,
                        energy_force_kernel, energy_kernel,
                        opt_algorithm,
                        par=True, no_cpus=2)
    gaussian.update_db(test_structure, forces)

    gaussian.set_L_alpha()
Esempio n. 2
0
def two_body_gp() -> GaussianProcess:
    """Returns a GP instance with a two-body numba-based kernel"""
    print("\nSetting up...\n")

    # params
    cell = np.eye(3)
    unique_species = [2, 1]
    cutoffs = np.array([0.8, 0.8])
    noa = 5

    # create test structure
    test_structure, forces = get_random_structure(cell, unique_species, noa)

    # test update_db
    gaussian = \
        GaussianProcess(kernel=en.three_body,
                        kernel_grad=en.three_body_grad,
                        hyps=np.array([1, 1, 1]),
                        hyp_labels=['Length', 'Signal Var.', 'Noise Var.'],
                        par=True, no_cpus=2,
                        cutoffs=cutoffs)
    gaussian.update_db(test_structure, forces)

    # return gaussian
    yield gaussian

    # code after yield will be executed once all tests are run
    # this will not be run if an exception is raised in the setup
    print("\n\nTearing down\n")
    del gaussian
Esempio n. 3
0
def two_plus_three_gp() -> GaussianProcess:
    """Returns a GP instance with a 2+3-body kernel."""

    cutoffs = np.array([0.8, 0.8])
    hyps = np.array([1.0, 1.0, 1.0, 1.0, 1.0])

    # test update_db
    gpname = "2+3_mc"
    cutoffs = np.ones(2) * 0.8

    gp_model = GaussianProcess(
        kernel_name=gpname,
        hyps=hyps,
        cutoffs=cutoffs,
        multihyps=False,
        parallel=False,
        n_cpus=1,
    )

    test_structure, forces = get_random_structure(np.eye(3), [1, 2], 3)
    energy = 3.14

    gp_model.update_db(test_structure, forces, energy=energy)

    yield gp_model
    del gp_model
Esempio n. 4
0
def three_body_gp() -> GaussianProcess:
    """Returns a GP instance with a two-body numba-based kernel"""
    print("\nSetting up...\n")

    # params
    cell = np.eye(3)
    unique_species = [2, 1]
    cutoffs = np.array([0.8, 0.8])
    noa = 5

    nbond = 0
    ntriplet = 1
    hyps, hm = generate_hm(nbond, ntriplet)

    # create test structure
    test_structure, forces = get_random_structure(cell, unique_species, noa)

    # test update_db
    gaussian = \
        GaussianProcess(kernel=en.three_body_mc,
                        kernel_grad=en.three_body_mc_grad,
                        hyps=hyps,
                        hyp_labels=hm['hyps_label'],
                        cutoffs=cutoffs, multihyps=True, hyps_mask=hm)
    gaussian.update_db(test_structure, forces)

    # return gaussian
    yield gaussian

    # code after yield will be executed once all tests are run
    # this will not be run if an exception is raised in the setup
    print("\n\nTearing down\n")
    del gaussian
Esempio n. 5
0
def test_training_statistics():
    """
    Ensure training statistics are being recorded correctly
    :return:
    """

    test_structure, forces = get_random_structure(np.eye(3), ["H", "Be"], 10)
    energy = 3.14

    gp = GaussianProcess(kernel_name="2", cutoffs=[10])

    data = gp.training_statistics

    assert data["N"] == 0
    assert len(data["species"]) == 0
    assert len(data["envs_by_species"]) == 0

    gp.update_db(test_structure, forces, energy=energy)

    data = gp.training_statistics

    assert data["N"] == 10
    assert len(data["species"]) == len(set(test_structure.coded_species))
    assert len(data["envs_by_species"]) == len(
        set(test_structure.coded_species))
Esempio n. 6
0
def test_training_statistics():
    """
    Ensure training statistics are being recorded correctly
    :return:
    """

    test_structure, forces = get_random_structure(np.eye(3),
                                                  ['H', 'Be'],
                                                  10)

    gp = GaussianProcess(kernel_name='2', cutoffs=[10])

    data = gp.training_statistics

    assert data['N'] == 0
    assert len(data['species']) == 0
    assert len(data['envs_by_species']) == 0

    gp.update_db(test_structure, forces)

    data = gp.training_statistics

    assert data['N'] == 10
    assert len(data['species']) == len(set(test_structure.coded_species))
    assert len(data['envs_by_species']) == len(set(
        test_structure.coded_species))
Esempio n. 7
0
def get_gp(
    bodies,
    kernel_type="mc",
    multihyps=True,
    cellabc=[1, 1, 1.5],
    force_only=False,
    noa=5,
) -> GaussianProcess:
    """Returns a GP instance with a two-body numba-based kernel"""
    print("\nSetting up...\n")

    # params
    cell = np.diag(cellabc)
    unique_species = [1, 2]

    ntwobody = 0
    nthreebody = 0
    prefix = bodies
    if "2" in bodies or "two" in bodies:
        ntwobody = 1
    if "3" in bodies or "three" in bodies:
        nthreebody = 1

    hyps, hm, _ = generate_hm(ntwobody,
                              nthreebody,
                              nmanybody=0,
                              multihyps=multihyps)
    cutoffs = hm["cutoffs"]
    kernels = hm["kernels"]
    hl = hm["hyp_labels"]

    # create test structure
    test_structure, forces = get_random_structure(cell, unique_species, noa)
    energy = 3.14

    # test update_db
    gaussian = GaussianProcess(
        kernels=kernels,
        component=kernel_type,
        hyps=hyps,
        hyp_labels=hl,
        cutoffs=cutoffs,
        hyps_mask=hm,
        parallel=False,
        n_cpus=1,
    )

    if force_only:
        gaussian.update_db(test_structure, forces)
    else:
        gaussian.update_db(test_structure, forces, energy=energy)
    gaussian.check_L_alpha()

    # print(gaussian.alpha)

    return gaussian
Esempio n. 8
0
def two_plus_three_gp() -> GaussianProcess:
    """Returns a GP instance with a 2+3-body kernel."""
    cutoffs = {'twobody': 0.8, 'threebody': 0.8}
    hyps = np.array([1., 1., 1., 1., 1.])

    gp_model = \
        GaussianProcess(kernels=['twobody', 'threebody'],
                        hyps=hyps, cutoffs=cutoffs,
                        multihyps=False, parallel=False, n_cpus=1)

    test_structure, forces = \
        get_random_structure(np.eye(3), [1, 2], 3)
    energy = 3.14

    gp_model.update_db(test_structure, forces, energy=energy)

    yield gp_model
    del gp_model
Esempio n. 9
0
def test_to_from_gp():
    """
    To/from methods for creating new RBCMs
    and turning them back into GPs
    :return:
    """

    gp = GaussianProcess()

    for frame in methanol_frames:
        gp.update_db(frame, forces=frame.forces)

    rbcm = RobustBayesianCommitteeMachine.from_gp(gp)

    new_gp = rbcm.get_full_gp()

    test_env = methanol_envs[0]

    for d in range(1, 4):
        assert np.array_equal(gp.predict(test_env, d),
                              new_gp.predict(test_env, d))
Esempio n. 10
0
def get_gp(bodies, kernel_type='mc', multihyps=True) -> GaussianProcess:
    """Returns a GP instance with a two-body numba-based kernel"""
    print("\nSetting up...\n")

    # params
    cell = np.diag(np.array([1, 1, 1.5]))
    unique_species = [2, 1]
    cutoffs = np.array([0.8, 0.8])
    noa = 5

    nbond = 0
    ntriplet = 0
    prefix = bodies
    if ('2' in bodies or 'two' in bodies):
        nbond = 1
    if ('3' in bodies or 'three' in bodies):
        ntriplet = 1

    hyps, hm, _ = generate_hm(nbond, ntriplet, multihyps=multihyps)

    # create test structure
    test_structure, forces = get_random_structure(cell, unique_species, noa)

    hl = hm['hyps_label']
    if (multihyps is False):
        hm = None

    # test update_db
    gaussian = \
        GaussianProcess(kernel_name=f'{prefix}{kernel_type}',
                        hyps=hyps,
                        hyp_labels=hl,
                        cutoffs=cutoffs, multihyps=multihyps, hyps_mask=hm,
                        parallel=False, n_cpus=1)
    gaussian.update_db(test_structure, forces)
    gaussian.check_L_alpha()

    return gaussian
Esempio n. 11
0
def test_remove_force_data():
    """
    Train a GP on one fake structure. Store forces from prediction.
    Add a new fake structure and ensure predictions change; then remove
    the structure and ensure predictions go back to normal.
    :return:
    """

    test_structure, forces = get_random_structure(5.0 * np.eye(3), ["H", "Be"],
                                                  5)

    test_structure_2, forces_2 = get_random_structure(5.0 * np.eye(3),
                                                      ["H", "Be"], 5)

    gp = GaussianProcess(kernels=["twobody"], cutoffs={"twobody": 0.8})

    gp.update_db(test_structure, forces)

    with raises(ValueError):
        gp.remove_force_data(1000000)

    init_forces, init_stds = predict_on_structure(test_structure,
                                                  gp,
                                                  write_to_structure=False)
    init_forces_2, init_stds_2 = predict_on_structure(test_structure_2,
                                                      gp,
                                                      write_to_structure=False)

    # Alternate adding in the entire structure and adding in only one atom.
    for custom_range in [None, [0]]:

        # Add in data and ensure the predictions change in reponse
        gp.update_db(test_structure_2, forces_2, custom_range=custom_range)

        new_forces, new_stds = predict_on_structure(test_structure,
                                                    gp,
                                                    write_to_structure=False)

        new_forces_2, new_stds_2 = predict_on_structure(
            test_structure_2, gp, write_to_structure=False)

        assert not np.array_equal(init_forces, new_forces)
        assert not np.array_equal(init_forces_2, new_forces_2)
        assert not np.array_equal(init_stds, new_stds)
        assert not np.array_equal(init_stds_2, new_stds_2)

        # Remove that data and test to see that the predictions revert to
        # what they were previously
        if custom_range == [0]:
            popped_strucs, popped_forces = gp.remove_force_data(5)
        else:
            popped_strucs, popped_forces = gp.remove_force_data(
                [5, 6, 7, 8, 9])

        for i in range(len(popped_forces)):
            assert np.array_equal(popped_forces[i], forces_2[i])
            assert np.array_equal(popped_strucs[i].structure.positions,
                                  test_structure_2.positions)

        final_forces, final_stds = predict_on_structure(
            test_structure, gp, write_to_structure=False)
        final_forces_2, final_stds_2 = predict_on_structure(
            test_structure_2, gp, write_to_structure=False)

        assert np.array_equal(init_forces, final_forces)
        assert np.array_equal(init_stds, final_stds)

        assert np.array_equal(init_forces_2, final_forces_2)
        assert np.array_equal(init_stds_2, final_stds_2)
Esempio n. 12
0
def test_prediction():
    """
    Test that prediction functions works.
    The RBCM in the 1-expert case *does not* reduce to a GP's predictions,
    because the way the mean and variance is computed for each expert
    is weighted based on the expert's performance on the entire dataset in a way
    that does not yield 1 in the absence of other experts.

    Hence, perform the relevant transformations on a GP's prediction
    and check it against the RBCM's.
    :return:
    """
    prior_var = 0.1
    rbcm = RobustBayesianCommitteeMachine(
        ndata_per_expert=100,
        prior_variance=prior_var,
    )
    gp = GaussianProcess()

    envs = methanol_envs[:10]

    for env in envs:
        rbcm.add_one_env(env, env.force)
        gp.add_one_env(env, env.force, train=False)

    struc = methanol_frames[-1]
    gp.update_db(struc, forces=struc.forces)
    rbcm.update_db(struc, forces=struc.forces)
    test_env = methanol_envs[-1]

    for d in [1, 2, 3]:
        assert np.array_equal(gp.hyps, rbcm.hyps)
        rbcm_pred = rbcm.predict(test_env, d)
        gp_pred = gp.predict(test_env, d)
        gp_kv = get_kernel_vector(
            gp.name,
            gp.kernel,
            gp.energy_force_kernel,
            test_env,
            d,
            gp.hyps,
            cutoffs=gp.cutoffs,
            hyps_mask=gp.hyps_mask,
            n_cpus=1,
            n_sample=gp.n_sample,
        )
        gp_mean = np.matmul(gp_kv, gp.alpha)
        assert gp_mean == gp_pred[0]
        gp_self_kern = gp.kernel(
            env1=test_env,
            env2=test_env,
            d1=d,
            d2=d,
            hyps=gp.hyps,
            cutoffs=np.array((7, 3.5)),
        )

        gp_var_i = gp_self_kern - np.matmul(np.matmul(gp_kv.T, gp.ky_mat_inv),
                                            gp_kv)
        gp_beta = 0.5 * (np.log(prior_var) - np.log(gp_var_i))
        mean = gp_mean * gp_beta / gp_var_i
        var = gp_beta / gp_var_i + (1 - gp_beta) / prior_var
        pred_var = 1.0 / var
        pred_mean = pred_var * mean

        assert pred_mean == rbcm_pred[0]
        assert pred_var == rbcm_pred[1]
Esempio n. 13
0
class FlareCalc(FLARE_Calculator, MLPCalc):

    implemented_properties = ["energy", "forces", "stress", "stds"]

    def __init__(self,
                 flare_params: dict,
                 initial_images,
                 mgp_model=None,
                 par=False,
                 use_mapping=False,
                 **kwargs):
        self.initial_images = initial_images
        self.init_species_map()
        MLPCalc.__init__(self, mlp_params=flare_params)
        super().__init__(None,
                         mgp_model=mgp_model,
                         par=par,
                         use_mapping=use_mapping,
                         **kwargs)

    def init_flare(self):
        self.gp_model = GaussianProcess(**self.mlp_params)

    def init_species_map(self):
        self.species_map = {}
        a_numbers = []
        for image in self.initial_images:
            a_numbers += np.unique(image.numbers).tolist()
        a_numbers = np.unique(a_numbers)
        for i in range(len(a_numbers)):
            self.species_map[a_numbers[i]] = i

    def calculate(self, atoms=None, properties=None, system_changes=...):
        MLPCalc.calculate(self,
                          atoms=atoms,
                          properties=properties,
                          system_changes=system_changes)
        return super().calculate(atoms=atoms,
                                 properties=properties,
                                 system_changes=system_changes)

    def calculate_gp(self, atoms):
        structure = self.get_descriptor_from_atoms(atoms)
        super().calculate_gp(structure)

        self.results["force_stds"] = self.results["stds"]
        self.results["energy_stds"] = self.results["local_energy_stds"]
        atoms.info["energy_stds"] = self.results["local_energy_stds"]
        atoms.info["max_force_stds"] = np.nanmax(self.results["force_stds"])

    def train(self, parent_dataset, new_dataset=None):
        if not self.gp_model or not new_dataset:
            self.init_flare()
            self.train_on_dataset(parent_dataset)
        else:
            self.train_on_dataset(new_dataset)

    def train_on_dataset(self, dataset):
        for atoms in dataset:
            structure = self.get_descriptor_from_atoms(
                atoms,
                energy=atoms.get_potential_energy(),
                forces=atoms.get_forces())
            self.gp_model.update_db(
                struc=structure,
                forces=atoms.get_forces(),
                energy=atoms.get_potential_energy(),
            )

    def get_descriptor_from_atoms(self, atoms, energy=None, forces=None):
        structure = Structure(
            cell=atoms.get_cell(),
            species=[self.species_map[x] for x in atoms.get_atomic_numbers()],
            positions=atoms.get_positions(),
            forces=forces,
            energy=energy,
        )
        return structure