Example #1
0
def test_fit_1():
    """
    This function tests the first way of preparing for fitting the neural network: 
    Compounds are created from xyz files and the energies are stored in the estimator.
    The fit method is called with the indices of the molecules we want to fit.
    """

    test_dir = os.path.dirname(os.path.realpath(__file__))

    filenames = glob.glob(test_dir + "/CN_isobutane/*.xyz")
    energies = np.loadtxt(test_dir + '/CN_isobutane/prop_kjmol_training.txt',
                          usecols=[1])
    filenames.sort()

    available_representations = [
        'sorted_coulomb_matrix', 'unsorted_coulomb_matrix', 'bag_of_bonds',
        'slatm'
    ]

    for rep in available_representations:
        estimator = MRMP(representation=rep)
        estimator.generate_compounds(filenames[:100])
        estimator.set_properties(energies[:100])
        estimator.generate_representation()

        idx = np.arange(0, 100)
        estimator.fit(idx)
Example #2
0
def test_fit_3():
    """
    This function tests a third way of fitting the descriptor: 
    The data is passed directly to the fit function.
    """
    test_dir = os.path.dirname(os.path.realpath(__file__))

    data = np.load(test_dir + "/data/CN_isopent_light_UCM.npz")
    descriptor = data["arr_0"]
    energies = data["arr_1"]

    estimator = MRMP()
    estimator.fit(descriptor, energies)
Example #3
0
def test_set_properties():
    """
    This test checks that the MRMP.set_properties method works.
    """
    test_dir = os.path.dirname(os.path.realpath(__file__))

    energies = np.loadtxt(test_dir + '/CN_isobutane/prop_kjmol_training.txt',
                          usecols=[1])

    estimator = MRMP(representation='unsorted_coulomb_matrix')

    assert estimator.properties == None

    estimator.set_properties(energies)

    assert np.all(estimator.properties == energies)
Example #4
0
def test_load_external():
    """
    This function tests if a model that has been trained on a different computer can be loaded and used on a different
    computer.
    """

    x = np.linspace(-10.0, 10.0, 2000)
    y = x**2
    x = np.reshape(x, (x.shape[0], 1))

    estimator = MRMP()
    estimator.load_nn("saved_model")

    score_after_loading = estimator.score(x, y)
    score_on_other_machine = -24.101043

    assert np.isclose(score_after_loading, score_on_other_machine)
Example #5
0
def test_fit1():
    """This tests that the neural net can overfit a cubic function."""

    x = np.linspace(-2.0, 2.0, 200)
    X = np.reshape(x, (len(x), 1))
    y = x ** 3

    estimator = MRMP(hidden_layer_sizes=(5, 5, 5), learning_rate=0.01, iterations=35000)
    estimator.fit(X, y)

    x_test = np.linspace(-1.5, 1.5, 15)
    X_test = np.reshape(x_test, (len(x_test), 1))
    y_test = x_test ** 3
    y_pred = estimator.predict(X_test)

    y_pred_row = np.reshape(y_pred, (y_pred.shape[0],))
    np.testing.assert_array_almost_equal(y_test, y_pred_row, decimal=1)
Example #6
0
def test_get_batch_size():
    """
    This tests the get_batch_size function
    :return:
    """

    example_data = [200, 50, 50]
    possible_cases = ["auto", 100, 20]
    expected_batch_sizes = [100, 50, 17]

    actual_batch_sizes = []
    for i, case in enumerate(possible_cases):
        obj = MRMP(batch_size=case)
        obj.n_samples = example_data[i]
        actual_batch = obj._get_batch_size()
        actual_batch_sizes.append(actual_batch)

    for i in range(len(expected_batch_sizes)):
        assert actual_batch_sizes[i] == expected_batch_sizes[i]
Example #7
0
def test_l1_loss():
    """
    This tests the evaluation of the l1 regularisation term on the weights of the neural net.
    :return: None
    """

    # Some example weights
    weights = [tf.constant([2.0, 4.0], dtype=tf.float32)]

    # Creating object with known l1_reg parameter
    obj = MRMP(l1_reg=0.1)
    expected_result = [0.6]

    # Evaluating l1 term
    l1_loss_tf = obj._l1_loss(weights=weights)
    sess = tf.Session()
    l1_loss = sess.run(l1_loss_tf)

    # Testing
    assert np.isclose(l1_loss, expected_result)
Example #8
0
def test_set_descriptor():
    """
    This test checks that the set_descriptor function works as expected.
    """

    test_dir = os.path.dirname(os.path.realpath(__file__))

    data_correct = np.load(test_dir + "/data/CN_isopent_light_UCM.npz")
    data_incorrect = np.load(test_dir + "/data/local_slatm_ch4cn_light.npz")
    descriptor_correct = data_correct["arr_0"]
    descriptor_incorrect = data_incorrect["arr_0"]

    estimator = MRMP()

    assert estimator.representation == None

    estimator.set_representations(representations=descriptor_correct)

    assert np.all(estimator.representation == descriptor_correct)

    # Pass a descriptor with the wrong shape
    try:
        estimator.set_representations(representations=descriptor_incorrect)
        raise Exception
    except InputError:
        pass
Example #9
0
def test_save_local():
    """
    This function tests the saving and the loading of a trained model.
    """

    x = np.linspace(-10.0, 10.0, 2000)
    y = x**2

    x = np.reshape(x, (x.shape[0], 1))

    estimator = MRMP()
    estimator.fit(x=x, y=y)

    score_after_training = estimator.score(x, y)
    estimator.save_nn(save_dir="saved_test_model")

    estimator.load_nn(save_dir="saved_test_model")
    score_after_loading = estimator.score(x, y)

    assert score_after_loading == score_after_training

    shutil.rmtree("./saved_test_model")
Example #10
0
def test_set_representation():
    """
    This function tests the method MRMP._set_representation.
    """
    try:
        MRMP(representation='unsorted_coulomb_matrix',
             representation_params={'slatm_sigma1': 0.05})
        raise Exception
    except InputError:
        pass

    try:
        MRMP(representation='coulomb_matrix')
        raise Exception
    except InputError:
        pass

    try:
        MRMP(representation='slatm',
             representation_params={'slatm_alchemy': 0.05})
        raise Exception
    except InputError:
        pass

    parameters = {
        'slatm_sigma1': 0.07,
        'slatm_sigma2': 0.04,
        'slatm_dgrid1': 0.02,
        'slatm_dgrid2': 0.06,
        'slatm_rcut': 5.0,
        'slatm_rpower': 7,
        'slatm_alchemy': True
    }

    estimator = MRMP(representation='slatm', representation_params=parameters)

    assert estimator.representation_name == 'slatm'
    assert estimator.slatm_parameters == parameters
Example #11
0
def test_fit_2():
    """
    This function tests a second way of fitting the descriptor:
    The premade descriptors are stored in the estimator together with the energies.
    The fit method is called with the indices of the molecules we want to fit.
    """
    test_dir = os.path.dirname(os.path.realpath(__file__))

    data = np.load(test_dir + "/data/CN_isopent_light_UCM.npz")
    descriptor = data["arr_0"]
    energies = data["arr_1"]

    estimator = MRMP()
    estimator.set_representations(representations=descriptor)
    estimator.set_properties(energies)

    idx = np.arange(0, 100)
    estimator.fit(idx)
Example #12
0
from qml.aglaia.aglaia import MRMP
import numpy as np
import os

## ------------- ** Loading the data ** ---------------

# The data loaded contains 100 samples of the CN + isobutane data set in unsorted CM representation
current_dir = os.path.dirname(os.path.realpath(__file__))
data = np.load(current_dir + '/../test/data/CN_isopent_light_UCM.npz')

representation = data["arr_0"]
energies = data["arr_1"]

## ------------- ** Setting up the estimator ** ---------------

estimator = MRMP(iterations=7000, l2_reg=0.0)

estimator.set_representations(representations=representation)
estimator.set_properties(energies)

##  ------------- ** Fitting to the data ** ---------------

idx = np.arange(0, 100)

estimator.fit(idx)

##  ------------- ** Predicting and scoring ** ---------------

score = estimator.score(idx)

print("The mean absolute error is %s kJ/mol." % (str(-score)))
Example #13
0
def test_score():
    """
    This function tests that all the scoring functions work.
    """
    test_dir = os.path.dirname(os.path.realpath(__file__))

    data = np.load(test_dir + "/data/CN_isopent_light_UCM.npz")
    descriptor = data["arr_0"]
    energies = data["arr_1"]

    estimator_1 = MRMP(scoring_function='mae')
    estimator_1.fit(descriptor, energies)
    estimator_1.score(descriptor, energies)

    estimator_2 = MRMP(scoring_function='r2')
    estimator_2.fit(descriptor, energies)
    estimator_2.score(descriptor, energies)

    estimator_3 = MRMP(scoring_function='rmse')
    estimator_3.fit(descriptor, energies)
    estimator_3.score(descriptor, energies)
Example #14
0
import glob
from qml.aglaia.aglaia import MRMP
import numpy as np

filenames = glob.glob("/Volumes/Transcend/data_sets/CN_isobutane_model/geoms_2/training/*.xyz")[:10000]

estimator = MRMP()
estimator.generate_compounds(filenames)

xyz = []
zs = []

for item in estimator.compounds:
    xyz.append(item.coordinates)
    zs.append(item.nuclear_charges)

xyz = np.asarray(xyz)
zs = np.asarray(zs)
print(xyz.shape, zs.shape)

np.savez("xyz_cnisopent.npz", xyz, zs)

Example #15
0
import os

## ------------- ** Loading the data ** ---------------

current_dir = os.path.dirname(os.path.realpath(__file__))
filenames = glob.glob(current_dir + '/../test/CN_isobutane/*.xyz')
energies = np.loadtxt(current_dir +
                      '/../test/CN_isobutane/prop_kjmol_training.txt',
                      usecols=[1])
filenames.sort()

## ------------- ** Setting up the estimator ** ---------------

estimator = MRMP(representation='slatm',
                 representation_params={
                     'slatm_dgrid2': 0.06,
                     'slatm_dgrid1': 0.06
                 })

estimator.generate_compounds(filenames[:100])
estimator.set_properties(energies[:100])

estimator.generate_representation()

##  ------------- ** Fitting to the data ** ---------------

idx = np.arange(0, 100)

estimator.fit(idx)

##  ------------- ** Predicting and scoring ** ---------------
Example #16
0
"""

from qml.aglaia.aglaia import MRMP
import numpy as np
import os

## ------------- ** Loading the data ** ---------------

# The data loaded contains 100 samples of the CN + isobutane data set in unsorted CM representation
current_dir = os.path.dirname(os.path.realpath(__file__))
data = np.load(current_dir + '/../test/data/CN_isopent_light_UCM.npz')

representation = data["arr_0"]
energies = data["arr_1"]

## ------------- ** Setting up the estimator ** ---------------

estimator = MRMP()

##  ------------- ** Fitting to the data ** ---------------

estimator.fit(representation, energies)

##  ------------- ** Predicting and scoring ** ---------------

score = estimator.score(representation, energies)

print("The mean absolute error is %s kJ/mol." % (str(-score)))

energies_predict = estimator.predict(representation)