Esempio n. 1
0
def test_fit_3():
    """
    This function tests the thrid way of fitting the descriptor: the data is passed directly to the fit function.
    """
    test_dir = os.path.dirname(os.path.realpath(__file__))

    data = np.load(test_dir + "/data/local_slatm_ch4cn_light.npz")
    descriptor = data["arr_0"]
    classes = data["arr_1"]
    energies = data["arr_2"]

    estimator = ARMP()
    estimator.fit(x=descriptor, y=energies, classes=classes)
Esempio n. 2
0
def test_predict_3():
    test_dir = os.path.dirname(os.path.realpath(__file__))

    data = np.load(test_dir + "/data/local_slatm_ch4cn_light.npz")
    descriptor = data["arr_0"]
    classes = data["arr_1"]
    energies = data["arr_2"]

    estimator = ARMP()
    estimator.fit(x=descriptor, y=energies, classes=classes)
    energies_pred = estimator.predict(x=descriptor, classes=classes)

    assert energies.shape == energies_pred.shape
Esempio n. 3
0
def test_set_properties():
    """
    This test checks that the set_properties function sets the correct properties.
    :return:
    """
    test_dir = os.path.dirname(os.path.realpath(__file__))

    energies = np.loadtxt(test_dir + '/CN_isobutane/prop_kjmol_training.txt',
                          usecols=[1])

    estimator = ARMP(representation='slatm')

    assert estimator.properties == None

    estimator.set_properties(energies)

    assert np.all(estimator.properties == energies)
Esempio n. 4
0
def test_fit_1():
    """
    This function tests the first way of fitting the descriptor: the data is passed by first creating compounds and then
    the descriptors are created from the compounds.
    """
    test_dir = os.path.dirname(os.path.realpath(__file__))

    filenames = glob.glob(test_dir + "/CN_isobutane/*.xyz")
    energies = np.loadtxt(test_dir + '/CN_isobutane/prop_kjmol_training.txt',
                          usecols=[1])
    filenames.sort()

    estimator = ARMP(representation="acsf")
    estimator.generate_compounds(filenames[:50])
    estimator.set_properties(energies[:50])
    estimator.generate_representation()

    idx = np.arange(0, 50)
    estimator.fit(idx)
Esempio n. 5
0
def test_fit_2():
    """
    This function tests the second way of fitting the descriptor: the data is passed by storing the compounds in the
    class.
    """
    test_dir = os.path.dirname(os.path.realpath(__file__))

    data = np.load(test_dir + "/data/local_slatm_ch4cn_light.npz")
    descriptor = data["arr_0"]
    classes = data["arr_1"]
    energies = data["arr_2"]

    estimator = ARMP()
    estimator.set_representations(representations=descriptor)
    estimator.set_classes(classes=classes)
    estimator.set_properties(energies)

    idx = np.arange(0, 100)
    estimator.fit(idx)
Esempio n. 6
0
def test_set_descriptor():
    """
    This test checks that the set_descriptor function works as expected.
    :return:
    """
    test_dir = os.path.dirname(os.path.realpath(__file__))

    data_incorrect = np.load(test_dir + "/data/CN_isopent_light_UCM.npz")
    data_correct = np.load(test_dir + "/data/local_slatm_ch4cn_light.npz")
    descriptor_correct = data_correct["arr_0"]
    descriptor_incorrect = data_incorrect["arr_0"]

    estimator = ARMP()

    assert estimator.representation == None

    estimator.set_representations(representations=descriptor_correct)

    assert np.all(estimator.representation == descriptor_correct)

    # Pass a descriptor with the wrong shape
    try:
        estimator.set_representations(representations=descriptor_incorrect)
        raise Exception
    except InputError:
        pass
Esempio n. 7
0
def test_set_representation():
    """
    This function tests the function _set_representation.
    """
    try:
        ARMP(representation='slatm',
             representation_params={'slatm_sigma12': 0.05})
        raise Exception
    except InputError:
        pass

    try:
        ARMP(representation='coulomb_matrix')
        raise Exception
    except InputError:
        pass

    try:
        ARMP(representation='slatm',
             representation_params={'slatm_alchemy': 0.05})
        raise Exception
    except InputError:
        pass

    parameters = {
        'slatm_sigma1': 0.07,
        'slatm_sigma2': 0.04,
        'slatm_dgrid1': 0.02,
        'slatm_dgrid2': 0.06,
        'slatm_rcut': 5.0,
        'slatm_rpower': 7,
        'slatm_alchemy': True
    }

    estimator = ARMP(representation='slatm', representation_params=parameters)

    assert estimator.representation_name == 'slatm'
    assert estimator.slatm_parameters == parameters
Esempio n. 8
0
zs = np.array(data["zs"][-n_samples:], dtype=np.int32)

# Creating the estimator
acsf_params = {
    "nRs2": 5,
    "nRs3": 5,
    "nTs": 5,
    "rcut": 5,
    "acut": 5,
    "zeta": 220.127,
    "eta": 30.8065
}
estimator = ARMP(iterations=200,
                 representation_name='acsf',
                 representation_params=acsf_params,
                 tensorboard=True,
                 store_frequency=10,
                 l1_reg=0.0001,
                 l2_reg=0.005,
                 learning_rate=0.0005)

estimator.set_properties(ene)
estimator.generate_representation(xyz, zs)

saved_dir = "saved_model"

estimator.load_nn(saved_dir)

idx = list(range(n_samples))

estimator.fit(idx)
Esempio n. 9
0
def test_score_3():
    """
    This function tests that all the scoring functions work.
    """
    test_dir = os.path.dirname(os.path.realpath(__file__))

    data = np.load(test_dir + "/data/local_slatm_ch4cn_light.npz")
    descriptor = data["arr_0"]
    classes = data["arr_1"]
    energies = data["arr_2"]

    estimator_1 = ARMP(scoring_function='mae')
    estimator_1.fit(x=descriptor, y=energies, classes=classes)
    estimator_1.score(x=descriptor, y=energies, classes=classes)

    estimator_2 = ARMP(scoring_function='r2')
    estimator_2.fit(x=descriptor, y=energies, classes=classes)
    estimator_2.score(x=descriptor, y=energies, classes=classes)

    estimator_3 = ARMP(scoring_function='rmse')
    estimator_3.fit(x=descriptor, y=energies, classes=classes)
    estimator_3.score(x=descriptor, y=energies, classes=classes)
Esempio n. 10
0
import shutil
from qml.aglaia.aglaia import ARMP
import tensorflow as tf

xyz = np.array([[[0, 1, 0], [0, 1, 1], [1, 0, 1]],
                    [[1, 2, 2], [3, 1, 2], [1, 3, 4]],
                    [[4, 1, 2], [0.5, 5, 6], [-1, 2, 3]]])
zs = np.array([[1, 2, 3],
               [1, 2, 3],
               [1, 2, 3]])

ene_true = np.array([0.5, 0.9, 1.0])

estimator = ARMP(iterations=10, l1_reg=0.0001, l2_reg=0.005, learning_rate=0.0005, representation='acsf',
                 representation_params={"radial_rs": np.arange(0, 10, 5), "angular_rs": np.arange(0, 10, 5),
                                        "theta_s": np.arange(0, 3.14, 3)},
                 tensorboard=True, store_frequency=10
                 )

estimator.set_properties(ene_true)
estimator.generate_representation(xyz, zs)

idx = list(range(xyz.shape[0]))

estimator.fit(idx)
estimator.save_nn(save_dir="temp")

pred1 = estimator.predict(idx)

estimator.loaded_model = True
Esempio n. 11
0
## ------------- ** Setting up the estimator ** ---------------

acsf_params = {
    "nRs2": 5,
    "nRs3": 5,
    "nTs": 5,
    "rcut": 5,
    "acut": 5,
    "zeta": 220.127,
    "eta": 30.8065
}
estimator = ARMP(iterations=5000,
                 representation_name='acsf',
                 representation_params=acsf_params,
                 tensorboard=True,
                 learning_rate=0.075,
                 l1_reg=0.0,
                 l2_reg=0.0)

estimator.generate_compounds(filenames)
estimator.set_properties(energies)

estimator.generate_representation(method="fortran")
print("The shape of the representation is: %s" %
      (str(estimator.representation.shape)))

##  ------------- ** Fitting to the data ** ---------------

idx = np.arange(0, 100)
idx_train, idx_test = modsel.train_test_split(idx,
Esempio n. 12
0
from qml.aglaia.aglaia import ARMP
import numpy as np
import os

## ------------- ** Loading the data ** ---------------

current_dir = os.path.dirname(os.path.realpath(__file__))
data = np.load(current_dir + '/../test/data/local_slatm_ch4cn_light.npz')

descriptor = data["arr_0"]
zs = data["arr_1"]
energies = data["arr_2"]

## ------------- ** Setting up the estimator ** ---------------

estimator = ARMP(iterations=100, l2_reg=0.0)

estimator.set_representations(representations=descriptor)
estimator.set_classes(zs)
estimator.set_properties(energies)

##  ------------- ** Fitting to the data ** ---------------

idx = np.arange(0, 100)

estimator.fit(idx)

##  ------------- ** Predicting and scoring ** ---------------

score = estimator.score(idx)
Esempio n. 13
0
print("%i files were loaded." % (n_samples))

acsf_params = {
    "nRs2": 5,
    "nRs3": 5,
    "nTs": 5,
    "rcut": 5,
    "acut": 5,
    "zeta": 220.127,
    "eta": 30.8065
}
estimator = ARMP(iterations=6000,
                 representation_name='acsf',
                 representation_params=acsf_params,
                 l1_reg=0.0,
                 l2_reg=0.0,
                 scoring_function="rmse",
                 tensorboard=False,
                 store_frequency=10,
                 learning_rate=0.075)

estimator.set_properties(energies[:100])
estimator.generate_compounds(filenames[:100])
estimator.generate_representation(method="tf")
print(estimator.representation.shape)

idx = list(range(100))

idx_train, idx_test = modsel.train_test_split(idx,
                                              test_size=0,
                                              random_state=42,
# Creating the estimator
acsf_params = {
    "nRs2": 14,
    "nRs3": 14,
    "nTs": 14,
    "rcut": 3.29,
    "acut": 3.29,
    "zeta": 100.06564927139748,
    "eta": 39.81824764370754
}
estimator = ARMP(iterations=2633,
                 batch_size=22,
                 l1_reg=1.46e-05,
                 l2_reg=0.0001,
                 learning_rate=0.0013,
                 representation_name='acsf',
                 representation_params=acsf_params,
                 tensorboard=True,
                 store_frequency=25,
                 hidden_layer_sizes=(185, ))

# Loading the model previously trained
estimator.load_nn("../trained_nn/vr-nn")
estimator.set_properties(ene_surface)

# Generating the representation
start = time.time()
estimator.generate_representation(xyz_surface, zs_surface, method="fortran")
end = time.time()
print("The time taken to generate the representations is %s s" %
      (str(end - start)))
Esempio n. 15
0
    ],
    'radial_cutoff':
    5,
    'angular_cutoff':
    5,
    'zeta':
    17.8630,
    'eta':
    2.5148
}

# Generate estimator
estimator = ARMP(iterations=1,
                 l1_reg=0.0001,
                 l2_reg=0.005,
                 learning_rate=0.0005,
                 representation_name='acsf',
                 representation_params=acsf_params,
                 tensorboard=False,
                 store_frequency=10)
estimator.load_nn()

data_squal = h5py.File(
    "/Volumes/Transcend/data_sets/CN_squalane/dft/squalane_cn_dft.hdf5", "r")

xyz_squal = np.array(data_squal.get("xyz")[:10])
zs_squal = np.array(data_squal.get("zs")[:10], dtype=np.int32)
ene_squal = np.array(data_squal.get("ene")[:10]) * 2625.50

pred1 = estimator.predict_from_xyz(xyz_squal, zs_squal)

print(pred1)
Esempio n. 16
0
from qml.aglaia.aglaia import ARMP
import glob
import numpy as np
import os

## ------------- ** Loading the data ** ---------------

current_dir = os.path.dirname(os.path.realpath(__file__))
filenames = glob.glob(current_dir + '/../test/CN_isobutane/*.xyz')
energies = np.loadtxt(current_dir + '/../test/CN_isobutane/prop_kjmol_training.txt', usecols=[1])
filenames.sort()

## ------------- ** Setting up the estimator ** ---------------

estimator = ARMP(iterations=10, representation='acsf', representation_params={"radial_rs": np.arange(0, 10, 1), "angular_rs": np.arange(0.5, 10.5, 1),
"theta_s": np.arange(0, 5, 1)}, tensorboard=False)

estimator.generate_compounds(filenames)
estimator.set_properties(energies)

estimator.generate_representation()

##  ------------- ** Fitting to the data ** ---------------

idx = np.arange(0,100)

estimator.fit(idx)


##  ------------- ** Predicting and scoring ** ---------------
Esempio n. 17
0
## ------------- ** Loading the data ** ---------------

current_dir = os.path.dirname(os.path.realpath(__file__))
data = np.load(
    "/Volumes/Transcend/repositories/my_qml_fork/qml/test/data/local_slatm_ch4cn_light.npz"
)

descriptor = data["arr_0"]
zs = data["arr_1"]
energies = data["arr_2"]

## ------------- ** Setting up the estimator ** ---------------

estimator = ARMP(iterations=3000,
                 learning_rate=0.075,
                 l1_reg=0.0,
                 l2_reg=0.0,
                 tensorboard=True,
                 store_frequency=50)

estimator.set_representations(representations=descriptor)
estimator.set_classes(zs)
estimator.set_properties(energies)

##  ------------- ** Fitting to the data ** ---------------

idx = np.arange(0, 100)

estimator.fit(idx)

##  ------------- ** Predicting and scoring ** ---------------
Esempio n. 18
0
from sklearn import model_selection as modsel
import tensorflow as tf

# Getting the dataset
data = h5py.File("/Volumes/Transcend/data_sets/CN_isopentane/pruned_dft_with_forces/pruned_isopentane_cn_dft.hdf5", "r")

n_samples = 500

xyz = np.array(data.get("xyz")[-n_samples:])
ene = np.array(data.get("ene")[-n_samples:])*2625.50
ene = ene - data.get("ene")[0]*2625.50
zs = np.array(data["zs"][-n_samples:], dtype=np.int32)

# Creating the estimator
acsf_param = {"nRs2": 5, "nRs3": 5, "nTs": 5, "rcut": 5, "acut": 5, "zeta": 220.127, "eta": 30.8065}
estimator = ARMP(iterations=1000, batch_size=512, l1_reg=0.0, l2_reg=0.0, learning_rate=0.001, representation_name='acsf',
                 representation_params=acsf_param, tensorboard=False, store_frequency=50)
estimator.set_properties(ene)
estimator.generate_representation(xyz, zs, method='fortran')
print(estimator.g.shape)

# Doing cross validation
idx = list(range(n_samples))
idx_train, idx_test = modsel.train_test_split(idx, test_size=0.15, random_state=42, shuffle=False)

print("Starting the fitting...")
estimator.fit(idx_train)

# estimator.save_nn("saved_model")

pred1 = estimator.predict(idx_train)
pred2 = estimator.predict_from_xyz(xyz[idx_train], zs[idx_train])
Esempio n. 19
0
n_samples = 300

xyz = np.array(data.get("xyz")[-n_samples:])
ene = np.array(data.get("ene")[-n_samples:]) * 2625.50
ene = ene - data.get("ene")[0] * 2625.50
zs = np.array(data["zs"][-n_samples:], dtype=np.int32)

# Creating the estimator
estimator = ARMP(iterations=100,
                 l1_reg=0.0001,
                 l2_reg=0.005,
                 learning_rate=0.0005,
                 representation='acsf',
                 representation_params={
                     "radial_rs": np.arange(0, 10, 3),
                     "angular_rs": np.arange(0, 10, 3),
                     "theta_s": np.arange(0, 3.14, 3)
                 },
                 tensorboard=True,
                 store_frequency=10,
                 tensorboard_subdir="tb")

estimator.set_properties(ene)
estimator.generate_representation(xyz, zs)

saved_dir = "saved_model"

estimator.load_nn(saved_dir)

idx = list(range(n_samples))
Esempio n. 20
0
    # List of properties
    Y = np.array([mol.properties for mol in training])
    Ys = np.array([mol.properties for mol in test])

    ## ------------- ** Setting up the estimator ** ---------------

    print(Z)
    print(Z.shape)

    estimator = ARMP(
        iterations=10,
        l1_reg=0.0,
        l2_reg=0.0,
        hidden_layer_sizes=(40, 20, 10),
        tensorboard=True,
        store_frequency=10,
        # batch_size=400,
        batch_size=n_train,
        learning_rate=0.1,
        # scoring_function="mae",
    )

    estimator.set_representations(representations=X)
    estimator.set_classes(Z)
    estimator.set_properties(Y)

    # idx = np.arange(0,100)

    # estimator.fit(idx)

    # score = estimator.score(idx)
ene_isopent = ene_isopent - ref_ene
zs_isopent = np.array(data["zs"], dtype=np.int32)

# Shuffling the indices of the data and then selecting the first 9625 data points
idx = list(range(len(ene_isopent)))
shuffle(idx)
idx = idx[:7621]

# Appending the true energies to a list
predictions = [ene_isopent[idx]]

# Creating the estimator
acsf_params = {"nRs2":10, "nRs3":10, "nTs":10, "rcut":3.18, "acut":3.18, "zeta":52.779232035094125, "eta":1.4954812022150898}

estimator = ARMP(iterations=5283, batch_size=37, l1_reg=8.931599068573057e-06, l2_reg=3.535679697949907e-05,
                 learning_rate=0.0008170485394812195, representation_name='acsf', representation_params=acsf_params,
                 tensorboard=True, store_frequency=25, hidden_layer_sizes=(15,88))

# Putting the data into the model
estimator.set_properties(ene_isopent)
estimator.generate_representation(xyz_isopent, zs_isopent, method="fortran")
estimator.load_nn("md-nn")

# Predicting the energies
predictions.append(estimator.predict(idx))

# Scoring the results
score = estimator.score(idx)
print(score)

# Saving the predictions to a npz file
Esempio n. 22
0
from qml.aglaia.aglaia import ARMP
import glob
import numpy as np
from sklearn import model_selection as modsel

test_dir = "/Volumes/Transcend/repositories/my_qml_fork/qml/test/"

filenames = glob.glob(test_dir + "/qm7/*.xyz")
energies = np.loadtxt(test_dir + '/data/hof_qm7.txt', usecols=[1])
filenames.sort()

n_samples = 500

estimator = ARMP(representation_name="acsf", iterations=100)
estimator.generate_compounds(filenames[:n_samples])
estimator.set_properties(energies[:n_samples])
estimator.generate_representation(method="fortran")

idx = np.arange(0, n_samples)
idx_train, idx_test = modsel.train_test_split(idx,
                                              random_state=42,
                                              shuffle=True,
                                              test_size=0.1)

estimator.fit(idx_train)

estimator.score(idx_train)
# Creating the estimator
acsf_params = {
    "nRs2": 14,
    "nRs3": 14,
    "nTs": 14,
    "rcut": 3.29,
    "acut": 3.29,
    "zeta": 100.06564927139748,
    "eta": 39.81824764370754
}
estimator = ARMP(iterations=2633,
                 batch_size=22,
                 l1_reg=1.46e-05,
                 l2_reg=0.0001,
                 learning_rate=0.0013,
                 representation_name='acsf',
                 representation_params=acsf_params,
                 tensorboard=True,
                 store_frequency=25,
                 hidden_layer_sizes=(185, ))

estimator.set_properties(ene_isopent)
estimator.generate_representation(xyz_isopent, zs_isopent, method="fortran")

# Training the model on 3 folds of n data points
for n in n_samples:

    cv_idx = idx_train[:n]
    splitter = modsel.KFold(n_splits=3, random_state=42, shuffle=True)
    indices = splitter.split(cv_idx)
Esempio n. 24
0
acsf_params = {
    "nRs2": 15,
    "nRs3": 15,
    "nTs": 15,
    "rcut": 5,
    "acut": 5,
    "zeta": 220.127,
    "eta": 30.8065
}

# Generate estimator
estimator = ARMP(iterations=10,
                 l1_reg=0.0001,
                 l2_reg=0.005,
                 learning_rate=0.0005,
                 representation_name='acsf',
                 representation_params=acsf_params,
                 tensorboard=True,
                 store_frequency=2,
                 hidden_layer_sizes=(50, 30, 10),
                 batch_size=200)

estimator.set_properties(ene_isopent)
estimator.generate_representation(pad_xyz, pad_zs, method='fortran')

print("Generated the representations")
print(estimator.representation.shape)

idx = list(range(n_samples))
idx_train, idx_test = modsel.train_test_split(idx,
                                              random_state=42,
                                              shuffle=True)
Esempio n. 25
0
print("The l1 regularisation values:")
print(l1_reg)
print("The l2 regularisation values:")
print(l2_reg)

acsf_params = {
    "radial_rs": np.arange(0, 10, 0.5),
    "angular_rs": np.arange(0, 10, 0.5),
    "theta_s": np.arange(0, 3.14, 0.25)
}

estimator = ARMP(iterations=2000,
                 batch_size=256,
                 l1_reg=0.0001,
                 l2_reg=0.005,
                 learning_rate=0.00015,
                 representation='acsf',
                 representation_params=acsf_params,
                 tensorboard=True,
                 store_frequency=50)

estimator.set_properties(ene)
estimator.generate_representation(xyz, zs)

idx = list(range(n_samples))
idx_train, idx_test = modsel.train_test_split(idx,
                                              test_size=0.15,
                                              random_state=42,
                                              shuffle=True)

all_scores = []
Esempio n. 26
0
import os
import numpy as np

## ------------- ** Loading the data ** ---------------

current_dir = os.path.dirname(os.path.realpath(__file__))
data = np.load(current_dir + '/../test/data/local_slatm_ch4cn_light.npz')

representation = data["arr_0"]
zs = data["arr_1"]
energies = data["arr_2"]

## ------------- ** Setting up the estimator ** ---------------

estimator = ARMP(iterations=150,
                 l2_reg=0.0,
                 learning_rate=0.005,
                 hidden_layer_sizes=(40, 20, 10))

##  ------------- ** Fitting to the data ** ---------------

estimator.fit(x=representation, y=energies, classes=zs)

##  ------------- ** Predicting and scoring ** ---------------

score = estimator.score(x=representation, y=energies, classes=zs)

print("The mean absolute error is %s kJ/mol." % (str(-score)))

energies_predict = estimator.predict(x=representation, classes=zs)
Esempio n. 27
0
from qml.aglaia.aglaia import ARMP
import os
import numpy as np

## ------------- ** Loading the data ** ---------------

current_dir = os.path.dirname(os.path.realpath(__file__))
data = np.load("/Volumes/Transcend/repositories/my_qml_fork/qml/test/data/local_slatm_ch4cn_light.npz")

representation = data["arr_0"]
zs = data["arr_1"]
energies = data["arr_2"]

## ------------- ** Setting up the estimator ** ---------------

estimator = ARMP(iterations=3000, learning_rate=0.075, l1_reg=0.0, l2_reg=0.0, tensorboard=True, store_frequency=50)

##  ------------- ** Fitting to the data ** ---------------

estimator.fit(x=representation, y=energies, classes=zs)

##  ------------- ** Predicting and scoring ** ---------------

score = estimator.score(x=representation, y=energies, classes=zs)

print("The mean absolute error is %s kJ/mol." % (str(-score)))

energies_predict = estimator.predict(x=representation, classes=zs)