Esempio n. 1
0
def test_fit_1():
    """
    This function tests the first way of preparing for fitting the neural network: 
    Compounds are created from xyz files and the energies are stored in the estimator.
    The fit method is called with the indices of the molecules we want to fit.
    """

    test_dir = os.path.dirname(os.path.realpath(__file__))

    filenames = glob.glob(test_dir + "/CN_isobutane/*.xyz")
    energies = np.loadtxt(test_dir + '/CN_isobutane/prop_kjmol_training.txt',
                          usecols=[1])
    filenames.sort()

    available_representations = [
        'sorted_coulomb_matrix', 'unsorted_coulomb_matrix', 'bag_of_bonds',
        'slatm'
    ]

    for rep in available_representations:
        estimator = MRMP(representation=rep)
        estimator.generate_compounds(filenames[:100])
        estimator.set_properties(energies[:100])
        estimator.generate_representation()

        idx = np.arange(0, 100)
        estimator.fit(idx)
Esempio n. 2
0
def test_fit_3():
    """
    This function tests a third way of fitting the descriptor: 
    The data is passed directly to the fit function.
    """
    test_dir = os.path.dirname(os.path.realpath(__file__))

    data = np.load(test_dir + "/data/CN_isopent_light_UCM.npz")
    descriptor = data["arr_0"]
    energies = data["arr_1"]

    estimator = MRMP()
    estimator.fit(descriptor, energies)
Esempio n. 3
0
def test_fit1():
    """This tests that the neural net can overfit a cubic function."""

    x = np.linspace(-2.0, 2.0, 200)
    X = np.reshape(x, (len(x), 1))
    y = x ** 3

    estimator = MRMP(hidden_layer_sizes=(5, 5, 5), learning_rate=0.01, iterations=35000)
    estimator.fit(X, y)

    x_test = np.linspace(-1.5, 1.5, 15)
    X_test = np.reshape(x_test, (len(x_test), 1))
    y_test = x_test ** 3
    y_pred = estimator.predict(X_test)

    y_pred_row = np.reshape(y_pred, (y_pred.shape[0],))
    np.testing.assert_array_almost_equal(y_test, y_pred_row, decimal=1)
Esempio n. 4
0
def test_fit_2():
    """
    This function tests a second way of fitting the descriptor:
    The premade descriptors are stored in the estimator together with the energies.
    The fit method is called with the indices of the molecules we want to fit.
    """
    test_dir = os.path.dirname(os.path.realpath(__file__))

    data = np.load(test_dir + "/data/CN_isopent_light_UCM.npz")
    descriptor = data["arr_0"]
    energies = data["arr_1"]

    estimator = MRMP()
    estimator.set_representations(representations=descriptor)
    estimator.set_properties(energies)

    idx = np.arange(0, 100)
    estimator.fit(idx)
Esempio n. 5
0
def test_save_local():
    """
    This function tests the saving and the loading of a trained model.
    """

    x = np.linspace(-10.0, 10.0, 2000)
    y = x**2

    x = np.reshape(x, (x.shape[0], 1))

    estimator = MRMP()
    estimator.fit(x=x, y=y)

    score_after_training = estimator.score(x, y)
    estimator.save_nn(save_dir="saved_test_model")

    estimator.load_nn(save_dir="saved_test_model")
    score_after_loading = estimator.score(x, y)

    assert score_after_loading == score_after_training

    shutil.rmtree("./saved_test_model")
Esempio n. 6
0
def test_score():
    """
    This function tests that all the scoring functions work.
    """
    test_dir = os.path.dirname(os.path.realpath(__file__))

    data = np.load(test_dir + "/data/CN_isopent_light_UCM.npz")
    descriptor = data["arr_0"]
    energies = data["arr_1"]

    estimator_1 = MRMP(scoring_function='mae')
    estimator_1.fit(descriptor, energies)
    estimator_1.score(descriptor, energies)

    estimator_2 = MRMP(scoring_function='r2')
    estimator_2.fit(descriptor, energies)
    estimator_2.score(descriptor, energies)

    estimator_3 = MRMP(scoring_function='rmse')
    estimator_3.fit(descriptor, energies)
    estimator_3.score(descriptor, energies)
Esempio n. 7
0
## ------------- ** Loading the data ** ---------------

# The data loaded contains 100 samples of the CN + isobutane data set in unsorted CM representation
current_dir = os.path.dirname(os.path.realpath(__file__))
data = np.load(current_dir + '/../test/data/CN_isopent_light_UCM.npz')

representation = data["arr_0"]
energies = data["arr_1"]

## ------------- ** Setting up the estimator ** ---------------

estimator = MRMP(iterations=7000, l2_reg=0.0)

estimator.set_representations(representations=representation)
estimator.set_properties(energies)

##  ------------- ** Fitting to the data ** ---------------

idx = np.arange(0, 100)

estimator.fit(idx)

##  ------------- ** Predicting and scoring ** ---------------

score = estimator.score(idx)

print("The mean absolute error is %s kJ/mol." % (str(-score)))

energies_predict = estimator.predict(idx)
Esempio n. 8
0
"""

from qml.aglaia.aglaia import MRMP
import numpy as np
import os

## ------------- ** Loading the data ** ---------------

# The data loaded contains 100 samples of the CN + isobutane data set in unsorted CM representation
current_dir = os.path.dirname(os.path.realpath(__file__))
data = np.load(current_dir + '/../test/data/CN_isopent_light_UCM.npz')

representation = data["arr_0"]
energies = data["arr_1"]

## ------------- ** Setting up the estimator ** ---------------

estimator = MRMP()

##  ------------- ** Fitting to the data ** ---------------

estimator.fit(representation, energies)

##  ------------- ** Predicting and scoring ** ---------------

score = estimator.score(representation, energies)

print("The mean absolute error is %s kJ/mol." % (str(-score)))

energies_predict = estimator.predict(representation)