def test_fit_1(): """ This function tests the first way of preparing for fitting the neural network: Compounds are created from xyz files and the energies are stored in the estimator. The fit method is called with the indices of the molecules we want to fit. """ test_dir = os.path.dirname(os.path.realpath(__file__)) filenames = glob.glob(test_dir + "/CN_isobutane/*.xyz") energies = np.loadtxt(test_dir + '/CN_isobutane/prop_kjmol_training.txt', usecols=[1]) filenames.sort() available_representations = [ 'sorted_coulomb_matrix', 'unsorted_coulomb_matrix', 'bag_of_bonds', 'slatm' ] for rep in available_representations: estimator = MRMP(representation=rep) estimator.generate_compounds(filenames[:100]) estimator.set_properties(energies[:100]) estimator.generate_representation() idx = np.arange(0, 100) estimator.fit(idx)
def test_fit_3(): """ This function tests a third way of fitting the descriptor: The data is passed directly to the fit function. """ test_dir = os.path.dirname(os.path.realpath(__file__)) data = np.load(test_dir + "/data/CN_isopent_light_UCM.npz") descriptor = data["arr_0"] energies = data["arr_1"] estimator = MRMP() estimator.fit(descriptor, energies)
def test_set_properties(): """ This test checks that the MRMP.set_properties method works. """ test_dir = os.path.dirname(os.path.realpath(__file__)) energies = np.loadtxt(test_dir + '/CN_isobutane/prop_kjmol_training.txt', usecols=[1]) estimator = MRMP(representation='unsorted_coulomb_matrix') assert estimator.properties == None estimator.set_properties(energies) assert np.all(estimator.properties == energies)
def test_load_external(): """ This function tests if a model that has been trained on a different computer can be loaded and used on a different computer. """ x = np.linspace(-10.0, 10.0, 2000) y = x**2 x = np.reshape(x, (x.shape[0], 1)) estimator = MRMP() estimator.load_nn("saved_model") score_after_loading = estimator.score(x, y) score_on_other_machine = -24.101043 assert np.isclose(score_after_loading, score_on_other_machine)
def test_fit1(): """This tests that the neural net can overfit a cubic function.""" x = np.linspace(-2.0, 2.0, 200) X = np.reshape(x, (len(x), 1)) y = x ** 3 estimator = MRMP(hidden_layer_sizes=(5, 5, 5), learning_rate=0.01, iterations=35000) estimator.fit(X, y) x_test = np.linspace(-1.5, 1.5, 15) X_test = np.reshape(x_test, (len(x_test), 1)) y_test = x_test ** 3 y_pred = estimator.predict(X_test) y_pred_row = np.reshape(y_pred, (y_pred.shape[0],)) np.testing.assert_array_almost_equal(y_test, y_pred_row, decimal=1)
def test_get_batch_size(): """ This tests the get_batch_size function :return: """ example_data = [200, 50, 50] possible_cases = ["auto", 100, 20] expected_batch_sizes = [100, 50, 17] actual_batch_sizes = [] for i, case in enumerate(possible_cases): obj = MRMP(batch_size=case) obj.n_samples = example_data[i] actual_batch = obj._get_batch_size() actual_batch_sizes.append(actual_batch) for i in range(len(expected_batch_sizes)): assert actual_batch_sizes[i] == expected_batch_sizes[i]
def test_l1_loss(): """ This tests the evaluation of the l1 regularisation term on the weights of the neural net. :return: None """ # Some example weights weights = [tf.constant([2.0, 4.0], dtype=tf.float32)] # Creating object with known l1_reg parameter obj = MRMP(l1_reg=0.1) expected_result = [0.6] # Evaluating l1 term l1_loss_tf = obj._l1_loss(weights=weights) sess = tf.Session() l1_loss = sess.run(l1_loss_tf) # Testing assert np.isclose(l1_loss, expected_result)
def test_set_descriptor(): """ This test checks that the set_descriptor function works as expected. """ test_dir = os.path.dirname(os.path.realpath(__file__)) data_correct = np.load(test_dir + "/data/CN_isopent_light_UCM.npz") data_incorrect = np.load(test_dir + "/data/local_slatm_ch4cn_light.npz") descriptor_correct = data_correct["arr_0"] descriptor_incorrect = data_incorrect["arr_0"] estimator = MRMP() assert estimator.representation == None estimator.set_representations(representations=descriptor_correct) assert np.all(estimator.representation == descriptor_correct) # Pass a descriptor with the wrong shape try: estimator.set_representations(representations=descriptor_incorrect) raise Exception except InputError: pass
def test_save_local(): """ This function tests the saving and the loading of a trained model. """ x = np.linspace(-10.0, 10.0, 2000) y = x**2 x = np.reshape(x, (x.shape[0], 1)) estimator = MRMP() estimator.fit(x=x, y=y) score_after_training = estimator.score(x, y) estimator.save_nn(save_dir="saved_test_model") estimator.load_nn(save_dir="saved_test_model") score_after_loading = estimator.score(x, y) assert score_after_loading == score_after_training shutil.rmtree("./saved_test_model")
def test_set_representation(): """ This function tests the method MRMP._set_representation. """ try: MRMP(representation='unsorted_coulomb_matrix', representation_params={'slatm_sigma1': 0.05}) raise Exception except InputError: pass try: MRMP(representation='coulomb_matrix') raise Exception except InputError: pass try: MRMP(representation='slatm', representation_params={'slatm_alchemy': 0.05}) raise Exception except InputError: pass parameters = { 'slatm_sigma1': 0.07, 'slatm_sigma2': 0.04, 'slatm_dgrid1': 0.02, 'slatm_dgrid2': 0.06, 'slatm_rcut': 5.0, 'slatm_rpower': 7, 'slatm_alchemy': True } estimator = MRMP(representation='slatm', representation_params=parameters) assert estimator.representation_name == 'slatm' assert estimator.slatm_parameters == parameters
def test_fit_2(): """ This function tests a second way of fitting the descriptor: The premade descriptors are stored in the estimator together with the energies. The fit method is called with the indices of the molecules we want to fit. """ test_dir = os.path.dirname(os.path.realpath(__file__)) data = np.load(test_dir + "/data/CN_isopent_light_UCM.npz") descriptor = data["arr_0"] energies = data["arr_1"] estimator = MRMP() estimator.set_representations(representations=descriptor) estimator.set_properties(energies) idx = np.arange(0, 100) estimator.fit(idx)
from qml.aglaia.aglaia import MRMP import numpy as np import os ## ------------- ** Loading the data ** --------------- # The data loaded contains 100 samples of the CN + isobutane data set in unsorted CM representation current_dir = os.path.dirname(os.path.realpath(__file__)) data = np.load(current_dir + '/../test/data/CN_isopent_light_UCM.npz') representation = data["arr_0"] energies = data["arr_1"] ## ------------- ** Setting up the estimator ** --------------- estimator = MRMP(iterations=7000, l2_reg=0.0) estimator.set_representations(representations=representation) estimator.set_properties(energies) ## ------------- ** Fitting to the data ** --------------- idx = np.arange(0, 100) estimator.fit(idx) ## ------------- ** Predicting and scoring ** --------------- score = estimator.score(idx) print("The mean absolute error is %s kJ/mol." % (str(-score)))
def test_score(): """ This function tests that all the scoring functions work. """ test_dir = os.path.dirname(os.path.realpath(__file__)) data = np.load(test_dir + "/data/CN_isopent_light_UCM.npz") descriptor = data["arr_0"] energies = data["arr_1"] estimator_1 = MRMP(scoring_function='mae') estimator_1.fit(descriptor, energies) estimator_1.score(descriptor, energies) estimator_2 = MRMP(scoring_function='r2') estimator_2.fit(descriptor, energies) estimator_2.score(descriptor, energies) estimator_3 = MRMP(scoring_function='rmse') estimator_3.fit(descriptor, energies) estimator_3.score(descriptor, energies)
import glob from qml.aglaia.aglaia import MRMP import numpy as np filenames = glob.glob("/Volumes/Transcend/data_sets/CN_isobutane_model/geoms_2/training/*.xyz")[:10000] estimator = MRMP() estimator.generate_compounds(filenames) xyz = [] zs = [] for item in estimator.compounds: xyz.append(item.coordinates) zs.append(item.nuclear_charges) xyz = np.asarray(xyz) zs = np.asarray(zs) print(xyz.shape, zs.shape) np.savez("xyz_cnisopent.npz", xyz, zs)
import os ## ------------- ** Loading the data ** --------------- current_dir = os.path.dirname(os.path.realpath(__file__)) filenames = glob.glob(current_dir + '/../test/CN_isobutane/*.xyz') energies = np.loadtxt(current_dir + '/../test/CN_isobutane/prop_kjmol_training.txt', usecols=[1]) filenames.sort() ## ------------- ** Setting up the estimator ** --------------- estimator = MRMP(representation='slatm', representation_params={ 'slatm_dgrid2': 0.06, 'slatm_dgrid1': 0.06 }) estimator.generate_compounds(filenames[:100]) estimator.set_properties(energies[:100]) estimator.generate_representation() ## ------------- ** Fitting to the data ** --------------- idx = np.arange(0, 100) estimator.fit(idx) ## ------------- ** Predicting and scoring ** ---------------
""" from qml.aglaia.aglaia import MRMP import numpy as np import os ## ------------- ** Loading the data ** --------------- # The data loaded contains 100 samples of the CN + isobutane data set in unsorted CM representation current_dir = os.path.dirname(os.path.realpath(__file__)) data = np.load(current_dir + '/../test/data/CN_isopent_light_UCM.npz') representation = data["arr_0"] energies = data["arr_1"] ## ------------- ** Setting up the estimator ** --------------- estimator = MRMP() ## ------------- ** Fitting to the data ** --------------- estimator.fit(representation, energies) ## ------------- ** Predicting and scoring ** --------------- score = estimator.score(representation, energies) print("The mean absolute error is %s kJ/mol." % (str(-score))) energies_predict = estimator.predict(representation)