Example #1
0
    def from_dict(dictionary):
        """Create GP object from dictionary representation."""

        multihyps = dictionary.get('multihyps', False)

        new_gp = GaussianProcess(kernel_name=dictionary['kernel_name'],
                                 cutoffs=np.array(dictionary['cutoffs']),
                                 hyps=np.array(dictionary['hyps']),
                                 hyp_labels=dictionary['hyp_labels'],
                                 parallel=dictionary.get('parallel', False) or
                                          dictionary.get('par', False),
                                 per_atom_par=dictionary.get('per_atom_par',
                                                             True),
                                 n_cpus=dictionary.get(
                                     'n_cpus') or dictionary.get('no_cpus'),
                                 maxiter=dictionary['maxiter'],
                                 opt_algorithm=dictionary.get(
                                     'opt_algorithm', 'L-BFGS-B'),
                                 multihyps=multihyps,
                                 hyps_mask=dictionary.get('hyps_mask', None),
                                 name=dictionary.get('name', 'default_gp')
                                 )

        # Save time by attempting to load in computed attributes
        new_gp.training_data = [AtomicEnvironment.from_dict(env) for env in
                                dictionary['training_data']]
        new_gp.training_labels = deepcopy(dictionary['training_labels'])
        new_gp.training_labels_np = deepcopy(dictionary['training_labels_np'])

        new_gp.likelihood = dictionary['likelihood']
        new_gp.likelihood_gradient = dictionary['likelihood_gradient']
        new_gp.training_labels_np = np.hstack(new_gp.training_labels)

        _global_training_data[new_gp.name] = new_gp.training_data
        _global_training_labels[new_gp.name] = new_gp.training_labels_np

        # Save time by attempting to load in computed attributes
        if len(new_gp.training_data) > 5000:
            try:
                new_gp.ky_mat = np.load(dictionary['ky_mat_file'])
                new_gp.compute_matrices()
            except:
                new_gp.ky_mat = None
                new_gp.l_mat = None
                new_gp.alpha = None
                new_gp.ky_mat_inv = None
                filename = dictionary['ky_mat_file']
                Warning("the covariance matrices are not loaded" \
                        f"because {filename} cannot be found")
        else:
            new_gp.ky_mat_inv = np.array(dictionary['ky_mat_inv']) \
                if dictionary.get('ky_mat_inv') is not None else None
            new_gp.ky_mat = np.array(dictionary['ky_mat']) \
                if dictionary.get('ky_mat') is not None else None
            new_gp.l_mat = np.array(dictionary['l_mat']) \
                if dictionary.get('l_mat') is not None else None
            new_gp.alpha = np.array(dictionary['alpha']) \
                if dictionary.get('alpha') is not None else None

        return new_gp
Example #2
0
def test_backwards_compatibility(structure, mask, cutoff, result):
    """
    This test can be deleted if backwards compatibility is dropped for the
    sake of code cleanup. (This test executes in about 5 milliseconds). Tests a
    particular branch of code within the Environment's as_dict() method for
    older pickled environments without a cutoffs mask.
    :return:
    """
    if mask is True:
        mask = generate_mask(cutoff)
    else:
        mask = None

    env_test = deepcopy(
        AtomicEnvironment(structure, atom=0, cutoffs=cutoff,
                          cutoffs_mask=mask))
    pre_test_dict = env_test.as_dict()

    delattr(env_test, "cutoffs_mask")

    test_dict = env_test.as_dict()

    assert pre_test_dict["cutoffs_mask"] == test_dict["cutoffs_mask"]

    new_env = AtomicEnvironment.from_dict(test_dict)

    assert isinstance(new_env, AtomicEnvironment)

    assert str(new_env) == str(env_test)
Example #3
0
def test_env_methods(structure, mask, cutoff, result):
    if mask is True:
        mask = generate_mask(cutoff)
    else:
        mask = None

    env_test = AtomicEnvironment(structure,
                                 atom=0,
                                 cutoffs=cutoff,
                                 cutoffs_mask=mask)

    assert str(env_test) == \
        f'Atomic Env. of Type 1 surrounded by {result[0]} atoms' \
        ' of Types [1, 2, 3]'

    the_dict = env_test.as_dict()
    assert isinstance(the_dict, dict)
    for key in ['positions', 'cell', 'atom', 'cutoffs', 'species']:
        assert key in the_dict.keys()

    remade_env = AtomicEnvironment.from_dict(the_dict)
    assert isinstance(remade_env, AtomicEnvironment)

    assert np.array_equal(remade_env.bond_array_2, env_test.bond_array_2)
    if len(cutoff) > 1:
        assert np.array_equal(remade_env.bond_array_3, env_test.bond_array_3)
    if len(cutoff) > 2:
        assert np.array_equal(remade_env.q_array, env_test.q_array)
Example #4
0
def methanol_gp():
    the_gp = GaussianProcess(
        kernel_name="2+3_mc",
        hyps=np.array([
            3.75996759e-06,
            1.53990678e-02,
            2.50624782e-05,
            5.07884426e-01,
            1.70172923e-03,
        ]),
        cutoffs=np.array([5, 3]),
        hyp_labels=["l2", "s2", "l3", "s3", "n0"],
        maxiter=1,
        opt_algorithm="L-BFGS-B",
    )

    with open(path.join(TEST_FILE_DIR, "methanol_envs.json"), "r") as f:
        dicts = [loads(s) for s in f.readlines()]

    for cur_dict in dicts:
        force = cur_dict["forces"]
        env = AtomicEnvironment.from_dict(cur_dict)
        the_gp.add_one_env(env, force)

    the_gp.set_L_alpha()

    return the_gp
Example #5
0
def test_pred_on_elements():
    the_gp = GaussianProcess(kernel_name="2+3_mc",
                             hyps=np.array([
                                 3.75996759e-06, 1.53990678e-02,
                                 2.50624782e-05, 5.07884426e-01, 1.70172923e-03
                             ]),
                             cutoffs=np.array([7, 3]),
                             hyp_labels=['l2', 's2', 'l3', 's3', 'n0'],
                             maxiter=1,
                             opt_algorithm='L-BFGS-B')

    with open('./test_files/methanol_frames.json', 'r') as f:
        frames = [Structure.from_dict(loads(s)) for s in f.readlines()]

    with open('./test_files/methanol_envs.json', 'r') as f:
        data_dicts = [loads(s) for s in f.readlines()[:6]]
        envs = [AtomicEnvironment.from_dict(d) for d in data_dicts]
        forces = [np.array(d['forces']) for d in data_dicts]
        seeds = list(zip(envs, forces))

    all_frames = deepcopy(frames)
    tt = TrajectoryTrainer(frames,
                           gp=the_gp,
                           shuffle_frames=False,
                           rel_std_tolerance=0,
                           abs_std_tolerance=0,
                           abs_force_tolerance=.001,
                           skip=5,
                           min_atoms_per_train=100,
                           pre_train_seed_envs=seeds,
                           pre_train_seed_frames=[frames[-1]],
                           max_atoms_from_frame=4,
                           output_name='meth_test',
                           model_format='json',
                           atom_checkpoint_interval=50,
                           pre_train_atoms_per_element={'H': 1},
                           predict_atoms_per_element={
                               'H': 0,
                               'C': 1,
                               'O': 0
                           })
    # Set to predict only on Carbon after training on H to ensure errors are
    #  high and that they get added to the gp
    tt.run()

    # Ensure forces weren't written directly to structure
    for i in range(len(all_frames)):
        assert np.array_equal(all_frames[i].forces, frames[i].forces)

    # Assert that Carbon atoms were correctly added
    assert the_gp.training_statistics['envs_by_species']['C'] > 2

    for f in glob(f"meth_test*"):
        remove(f)

    for f in glob(f"gp_from_aimd*"):
        remove(f)
Example #6
0
def test_seed_and_run():
    the_gp = GaussianProcess(
        kernel_name="2+3_mc",
        hyps=np.array([
            3.75996759e-06,
            1.53990678e-02,
            2.50624782e-05,
            5.07884426e-01,
            1.70172923e-03,
        ]),
        cutoffs=np.array([5, 3]),
        hyp_labels=["l2", "s2", "l3", "s3", "n0"],
        maxiter=1,
        opt_algorithm="L-BFGS-B",
    )

    with open(path.join(TEST_FILE_DIR, "methanol_frames.json"), "r") as f:
        frames = [Structure.from_dict(loads(s)) for s in f.readlines()]

    with open(path.join(TEST_FILE_DIR, "methanol_envs.json"), "r") as f:
        data_dicts = [loads(s) for s in f.readlines()[:6]]
        envs = [AtomicEnvironment.from_dict(d) for d in data_dicts]
        forces = [np.array(d["forces"]) for d in data_dicts]
        seeds = list(zip(envs, forces))

    tt = TrajectoryTrainer(
        frames,
        gp=the_gp,
        shuffle_frames=True,
        rel_std_tolerance=0,
        abs_std_tolerance=0,
        skip=10,
        pre_train_seed_envs=seeds,
        pre_train_seed_frames=[frames[-1]],
        max_atoms_from_frame=4,
        output_name="meth_test",
        model_format="pickle",
        train_checkpoint_interval=1,
        pre_train_atoms_per_element={"H": 1},
    )

    tt.run()

    with open("meth_test_model.pickle", "rb") as f:
        new_gp = pickle.load(f)

    test_env = envs[0]

    for d in [1, 2, 3]:
        assert np.all(
            the_gp.predict(x_t=test_env, d=d) == new_gp.predict(x_t=test_env,
                                                                d=d))

    for f in glob(f"meth_test*"):
        remove(f)
Example #7
0
def test_seed_and_run():
    the_gp = GaussianProcess(kernel=two_plus_three_body_mc,
                             kernel_grad=two_plus_three_body_mc_grad,
                             hyps=np.array([
                                 3.75996759e-06, 1.53990678e-02,
                                 2.50624782e-05, 5.07884426e-01, 1.70172923e-03
                             ]),
                             cutoffs=np.array([7, 7]),
                             hyp_labels=['l2', 's2', 'l3', 's3', 'n0'],
                             maxiter=1,
                             opt_algorithm='L-BFGS-B')

    with open('./test_files/methanol_frames.json', 'r') as f:
        frames = [Structure.from_dict(loads(s)) for s in f.readlines()]

    with open('./test_files/methanol_envs.json', 'r') as f:
        data_dicts = [loads(s) for s in f.readlines()[:6]]
        envs = [AtomicEnvironment.from_dict(d) for d in data_dicts]
        forces = [np.array(d['forces']) for d in data_dicts]
        seeds = list(zip(envs, forces))

    tt = TrajectoryTrainer(frames,
                           gp=the_gp,
                           shuffle_frames=True,
                           rel_std_tolerance=0,
                           abs_std_tolerance=0,
                           skip=15,
                           pre_train_seed_envs=seeds,
                           pre_train_seed_frames=[frames[-1]],
                           max_atoms_from_frame=4,
                           model_write='meth_test.pickle',
                           model_format='pickle',
                           checkpoint_interval=1,
                           pre_train_atoms_per_element={'H': 1})

    tt.run()

    with open('meth_test.pickle', 'rb') as f:
        new_gp = pickle.load(f)

    test_env = envs[0]

    for d in [0, 1, 2]:
        assert np.all(
            the_gp.predict(x_t=test_env, d=d) == new_gp.predict(x_t=test_env,
                                                                d=d))

    os.system('rm ./gp_from_aimd.out')
    os.system('rm ./gp_from_aimd.xyz')
    os.system('rm ./gp_from_aimd-f.xyz')
    os.system('rm ./meth_test.pickle')
Example #8
0
    def from_dict(dictionary):
        """Create GP object from dictionary representation."""

        if 'mc' in dictionary['kernel_name']:
            force_kernel, grad = \
                str_to_mc_kernel(dictionary['kernel_name'], include_grad=True)
        else:
            force_kernel, grad = str_to_kernel(dictionary['kernel_name'],
                                               include_grad=True)

        if dictionary['energy_kernel'] is not None:
            energy_kernel = str_to_kernel(dictionary['energy_kernel'])
        else:
            energy_kernel = None

        if dictionary['energy_force_kernel'] is not None:
            energy_force_kernel = \
                str_to_kernel(dictionary['energy_force_kernel'])
        else:
            energy_force_kernel = None

        new_gp = GaussianProcess(kernel=force_kernel,
                                 kernel_grad=grad,
                                 energy_kernel=energy_kernel,
                                 energy_force_kernel=energy_force_kernel,
                                 cutoffs=np.array(dictionary['cutoffs']),
                                 hyps=np.array(dictionary['hyps']),
                                 hyp_labels=dictionary['hyp_labels'],
                                 par=dictionary['par'],
                                 no_cpus=dictionary['no_cpus'],
                                 maxiter=dictionary['maxiter'],
                                 opt_algorithm=dictionary['algo'])

        # Save time by attempting to load in computed attributes
        new_gp.l_mat = np.array(dictionary.get('l_mat', None))
        new_gp.l_mat_inv = np.array(dictionary.get('l_mat_inv', None))
        new_gp.alpha = np.array(dictionary.get('alpha', None))
        new_gp.ky_mat = np.array(dictionary.get('ky_mat', None))
        new_gp.ky_mat_inv = np.array(dictionary.get('ky_mat_inv', None))

        new_gp.training_data = [
            AtomicEnvironment.from_dict(env)
            for env in dictionary['training_data']
        ]
        new_gp.training_labels = dictionary['training_labels']

        new_gp.likelihood = dictionary['likelihood']
        new_gp.likelihood_gradient = dictionary['likelihood_gradient']
        new_gp.training_labels_np = np.hstack(new_gp.training_labels)
        return new_gp
Example #9
0
def test_env_methods(structure, mask, cutoff, result):
    if mask is True:
        mask = generate_mask(cutoff)
    else:
        mask = None

    structure = deepcopy(structure)
    structure.forces = np.random.random(size=(len(structure), 3))

    env_test = AtomicEnvironment(structure,
                                 atom=0,
                                 cutoffs=cutoff,
                                 cutoffs_mask=mask)

    assert np.array_equal(structure.forces[0], env_test.force)

    assert (
        str(env_test) ==
        f"Atomic Env. of Type 1 surrounded by {result[0]} atoms of Types [1, 2, 3]"
    )

    the_dict = env_test.as_dict()
    assert isinstance(the_dict, dict)
    the_str = env_test.as_str()
    assert dumps(the_dict, cls=NumpyEncoder) == the_str
    for key in ["positions", "cell", "atom", "cutoffs", "species"]:
        assert key in the_dict.keys()

    # This saves a few seconds, the masked envs take longer to read/write
    if not mask:
        with open("test_environment.json", "w") as f:
            f.write(env_test.as_str())
        remade_env = AtomicEnvironment.from_file("test_environment.json")
    else:
        remade_env = AtomicEnvironment.from_dict(the_dict)

    assert isinstance(remade_env, AtomicEnvironment)

    assert np.array_equal(remade_env.bond_array_2, env_test.bond_array_2)
    if len(cutoff) > 1:
        assert np.array_equal(remade_env.bond_array_3, env_test.bond_array_3)
    if len(cutoff) > 2:
        assert np.array_equal(remade_env.q_array, env_test.q_array)

    if not mask:
        remove("test_environment.json")
Example #10
0
def test_env_methods(cutoff):
    cell = np.eye(3)
    species = [1, 2, 3]
    positions = np.array([[0, 0, 0], [0.5, 0.5, 0.5], [0.1, 0.1, 0.1]])
    struc_test = Structure(cell, species, positions)
    env_test = AtomicEnvironment(struc_test, 0, np.array([1, 1]))
    assert str(env_test) == 'Atomic Env. of Type 1 surrounded by 12 atoms' \
                            ' of Types [2, 3]'

    the_dict = env_test.as_dict()
    assert isinstance(the_dict, dict)
    for key in ['positions', 'cell', 'atom', 'cutoffs', 'species']:
        assert key in the_dict.keys()

    remade_env = AtomicEnvironment.from_dict(the_dict)
    assert isinstance(remade_env, AtomicEnvironment)

    assert np.array_equal(remade_env.bond_array_2, env_test.bond_array_2)
    assert np.array_equal(remade_env.bond_array_3, env_test.bond_array_3)
    assert np.array_equal(remade_env.bond_array_mb, env_test.bond_array_mb)
Example #11
0
def methanol_gp():
    the_gp = GaussianProcess(kernel_name="2+3_mc",
                             hyps=np.array([
                                 3.75996759e-06, 1.53990678e-02,
                                 2.50624782e-05, 5.07884426e-01, 1.70172923e-03
                             ]),
                             cutoffs=np.array([7, 7]),
                             hyp_labels=['l2', 's2', 'l3', 's3', 'n0'],
                             maxiter=1,
                             opt_algorithm='L-BFGS-B')
    with open('./test_files/methanol_envs.json') as f:
        dicts = [loads(s) for s in f.readlines()]

    for cur_dict in dicts:
        force = cur_dict['forces']
        env = AtomicEnvironment.from_dict(cur_dict)
        the_gp.add_one_env(env, force)

    the_gp.set_L_alpha()

    return the_gp
Example #12
0
    def from_dict(dictionary):
        """Create GP object from dictionary representation."""

        GaussianProcess.backward_arguments(dictionary, dictionary)
        GaussianProcess.backward_attributes(dictionary)

        new_gp = GaussianProcess(**dictionary)

        # Save time by attempting to load in computed attributes
        if "training_data" in dictionary:
            new_gp.training_data = [
                AtomicEnvironment.from_dict(env)
                for env in dictionary["training_data"]
            ]
            new_gp.training_labels = deepcopy(dictionary["training_labels"])
            new_gp.training_labels_np = deepcopy(
                dictionary["training_labels_np"])
            new_gp.sync_data()

        # Reconstruct training structures.
        if "training_structures" in dictionary:
            new_gp.training_structures = []
            for n, env_list in enumerate(dictionary["training_structures"]):
                new_gp.training_structures.append([])
                for env_curr in env_list:
                    new_gp.training_structures[n].append(
                        AtomicEnvironment.from_dict(env_curr))
            new_gp.energy_labels = deepcopy(dictionary["energy_labels"])
            new_gp.energy_labels_np = deepcopy(dictionary["energy_labels_np"])
            new_gp.sync_data()

        new_gp.all_labels = np.concatenate(
            (new_gp.training_labels_np, new_gp.energy_labels_np))

        new_gp.likelihood = dictionary.get("likelihood", None)
        new_gp.likelihood_gradient = dictionary.get("likelihood_gradient",
                                                    None)

        new_gp.n_envs_prev = len(new_gp.training_data)

        # Save time by attempting to load in computed attributes
        if dictionary.get("ky_mat_file"):
            try:
                new_gp.ky_mat = np.load(dictionary["ky_mat_file"])
                new_gp.compute_matrices()
                new_gp.ky_mat_file = None

            except FileNotFoundError:
                new_gp.ky_mat = None
                new_gp.l_mat = None
                new_gp.alpha = None
                new_gp.ky_mat_inv = None
                filename = dictionary.get("ky_mat_file")
                logger = logging.getLogger(new_gp.logger_name)
                logger.warning("the covariance matrices are not loaded"
                               f"because {filename} cannot be found")
        else:
            new_gp.ky_mat = (np.array(dictionary["ky_mat"])
                             if dictionary.get("ky_mat") is not None else None)
            new_gp.ky_mat_inv = (np.array(dictionary["ky_mat_inv"])
                                 if dictionary.get("ky_mat_inv") is not None
                                 else None)
            new_gp.ky_mat = (np.array(dictionary["ky_mat"])
                             if dictionary.get("ky_mat") is not None else None)
            new_gp.l_mat = (np.array(dictionary["l_mat"])
                            if dictionary.get("l_mat") is not None else None)
            new_gp.alpha = (np.array(dictionary["alpha"])
                            if dictionary.get("alpha") is not None else None)

        return new_gp
Example #13
0
def test_pred_on_elements():
    the_gp = GaussianProcess(
        kernel_name="2+3_mc",
        hyps=np.array([
            3.75996759e-06,
            1.53990678e-02,
            2.50624782e-05,
            5.07884426e-01,
            1.70172923e-03,
        ]),
        cutoffs=np.array([5, 3]),
        hyp_labels=["l2", "s2", "l3", "s3", "n0"],
        maxiter=1,
        opt_algorithm="L-BFGS-B",
    )

    with open(path.join(TEST_FILE_DIR, "methanol_frames.json"), "r") as f:
        frames = [Structure.from_dict(loads(s)) for s in f.readlines()]

    with open(path.join(TEST_FILE_DIR, "methanol_envs.json"), "r") as f:
        data_dicts = [loads(s) for s in f.readlines()[:6]]
        envs = [AtomicEnvironment.from_dict(d) for d in data_dicts]
        forces = [np.array(d["forces"]) for d in data_dicts]
        seeds = list(zip(envs, forces))

    all_frames = deepcopy(frames)
    tt = TrajectoryTrainer(
        frames,
        gp=the_gp,
        shuffle_frames=False,
        rel_std_tolerance=0,
        abs_std_tolerance=0,
        abs_force_tolerance=0.001,
        skip=5,
        min_atoms_per_train=100,
        pre_train_seed_envs=seeds,
        pre_train_seed_frames=[frames[-1]],
        max_atoms_from_frame=4,
        output_name="meth_test",
        print_as_xyz=True,
        model_format="json",
        atom_checkpoint_interval=50,
        pre_train_atoms_per_element={"H": 1},
        predict_atoms_per_element={
            "H": 0,
            "C": 1,
            "O": 0
        },
    )
    # Set to predict only on Carbon after training on H to ensure errors are
    #  high and that they get added to the gp
    tt.run()

    # Ensure forces weren't written directly to structure
    for i in range(len(all_frames)):
        assert np.array_equal(all_frames[i].forces, frames[i].forces)

    # Assert that Carbon atoms were correctly added
    assert the_gp.training_statistics["envs_by_species"]["C"] > 2

    for f in glob(f"meth_test*"):
        remove(f)

    for f in glob(f"gp_from_aimd*"):
        remove(f)
Example #14
0
    def from_dict(dictionary):
        """Create GP object from dictionary representation."""

        multihyps = dictionary.get('multihyps', False)

        force_kernel, grad = str_to_kernels(dictionary['kernel_name'],
                                            multihyps,
                                            include_grad=True)

        if dictionary['energy_kernel'] is not None:
            energy_kernel = str_to_kernel(dictionary['energy_kernel'],
                                          multihyps)
        else:
            energy_kernel = None

        if dictionary['energy_force_kernel'] is not None:
            energy_force_kernel = \
                str_to_kernel(dictionary['energy_force_kernel'],
                              multihyps)
        else:
            energy_force_kernel = None

        new_gp = GaussianProcess(kernel=force_kernel,
                                 kernel_grad=grad,
                                 energy_kernel=energy_kernel,
                                 energy_force_kernel=energy_force_kernel,
                                 cutoffs=np.array(dictionary['cutoffs']),
                                 hyps=np.array(dictionary['hyps']),
                                 hyp_labels=dictionary['hyp_labels'],
                                 par=dictionary['par'],
                                 per_atom_par=dictionary.get('per_atom_par',True),
                                 n_cpus=dictionary.get('n_cpus') or dictionary.get('no_cpus'),
                                 maxiter=dictionary['maxiter'],
                                 opt_algorithm=dictionary['algo'],
                                 multihyps=multihyps,
                                 hyps_mask=dictionary.get('hyps_mask',None)
                                 )

        new_gp.training_data = [AtomicEnvironment.from_dict(env) for env in
                                dictionary['training_data']]
        new_gp.training_labels = deepcopy(dictionary['training_labels'])
        new_gp.training_labels_np = deepcopy(dictionary['training_labels_np'])

        new_gp.likelihood = dictionary['likelihood']
        new_gp.likelihood_gradient = dictionary['likelihood_gradient']
        new_gp.training_labels_np = np.hstack(new_gp.training_labels)

        # Save time by attempting to load in computed attributes
        if (len(new_gp.training_data)>5000):
            new_gp.ky_mat = np.load(dictionary['ky_mat_file'])
            new_gp.compute_matrices()
        else:
            new_gp.ky_mat_inv = np.array(dictionary['ky_mat_inv']) \
                if dictionary.get('ky_mat_inv') is not None else None
            new_gp.ky_mat = np.array(dictionary['ky_mat']) \
                if dictionary.get('ky_mat') is not None else None
            new_gp.l_mat = np.array(dictionary['l_mat']) \
                if dictionary.get('l_mat') is not None else None
            new_gp.alpha = np.array(dictionary['alpha']) \
                if dictionary.get('alpha') is not None  else None
        return new_gp