Beispiel #1
0
def test_set_properties():
    """
    This test checks that the set_properties function sets the correct properties.
    :return:
    """
    test_dir = os.path.dirname(os.path.realpath(__file__))

    energies = np.loadtxt(test_dir + '/CN_isobutane/prop_kjmol_training.txt',
                          usecols=[1])

    estimator = ARMP(representation='slatm')

    assert estimator.properties == None

    estimator.set_properties(energies)

    assert np.all(estimator.properties == energies)
Beispiel #2
0
def test_fit_2():
    """
    This function tests the second way of fitting the descriptor: the data is passed by storing the compounds in the
    class.
    """
    test_dir = os.path.dirname(os.path.realpath(__file__))

    data = np.load(test_dir + "/data/local_slatm_ch4cn_light.npz")
    descriptor = data["arr_0"]
    classes = data["arr_1"]
    energies = data["arr_2"]

    estimator = ARMP()
    estimator.set_representations(representations=descriptor)
    estimator.set_classes(classes=classes)
    estimator.set_properties(energies)

    idx = np.arange(0, 100)
    estimator.fit(idx)
Beispiel #3
0
def test_fit_1():
    """
    This function tests the first way of fitting the descriptor: the data is passed by first creating compounds and then
    the descriptors are created from the compounds.
    """
    test_dir = os.path.dirname(os.path.realpath(__file__))

    filenames = glob.glob(test_dir + "/CN_isobutane/*.xyz")
    energies = np.loadtxt(test_dir + '/CN_isobutane/prop_kjmol_training.txt',
                          usecols=[1])
    filenames.sort()

    estimator = ARMP(representation="acsf")
    estimator.generate_compounds(filenames[:50])
    estimator.set_properties(energies[:50])
    estimator.generate_representation()

    idx = np.arange(0, 50)
    estimator.fit(idx)
Beispiel #4
0
## ------------- ** Loading the data ** ---------------

current_dir = os.path.dirname(os.path.realpath(__file__))
data = np.load(current_dir + '/../test/data/local_slatm_ch4cn_light.npz')

descriptor = data["arr_0"]
zs = data["arr_1"]
energies = data["arr_2"]

## ------------- ** Setting up the estimator ** ---------------

estimator = ARMP(iterations=100, l2_reg=0.0)

estimator.set_representations(representations=descriptor)
estimator.set_classes(zs)
estimator.set_properties(energies)

##  ------------- ** Fitting to the data ** ---------------

idx = np.arange(0, 100)

estimator.fit(idx)

##  ------------- ** Predicting and scoring ** ---------------

score = estimator.score(idx)

print("The mean absolute error is %s kJ/mol." % (str(-score)))

energies_predict = estimator.predict(idx)
Beispiel #5
0
    "eta": 30.8065
}

# Generate estimator
estimator = ARMP(iterations=10,
                 l1_reg=0.0001,
                 l2_reg=0.005,
                 learning_rate=0.0005,
                 representation_name='acsf',
                 representation_params=acsf_params,
                 tensorboard=True,
                 store_frequency=2,
                 hidden_layer_sizes=(50, 30, 10),
                 batch_size=200)

estimator.set_properties(ene_isopent)
estimator.generate_representation(pad_xyz, pad_zs, method='fortran')

print("Generated the representations")
print(estimator.representation.shape)

idx = list(range(n_samples))
idx_train, idx_test = modsel.train_test_split(idx,
                                              random_state=42,
                                              shuffle=True)

estimator.fit(idx_train)

data_squal = h5py.File(
    "/Volumes/Transcend/data_sets/CN_squalane/dft/squalane_cn_dft.hdf5", "r")
Beispiel #6
0
    "rcut": 5,
    "acut": 5,
    "zeta": 220.127,
    "eta": 30.8065
}
estimator = ARMP(iterations=6000,
                 representation_name='acsf',
                 representation_params=acsf_params,
                 l1_reg=0.0,
                 l2_reg=0.0,
                 scoring_function="rmse",
                 tensorboard=False,
                 store_frequency=10,
                 learning_rate=0.075)

estimator.set_properties(energies[:100])
estimator.generate_compounds(filenames[:100])
estimator.generate_representation(method="tf")
print(estimator.representation.shape)

idx = list(range(100))

idx_train, idx_test = modsel.train_test_split(idx,
                                              test_size=0,
                                              random_state=42,
                                              shuffle=True)

estimator.fit(idx_train)

score = estimator.score(idx_train)
print("The RMSE is %s kcal/mol." % (str(score)))
Beispiel #7
0
from qml.aglaia.aglaia import ARMP
import glob
import numpy as np
from sklearn import model_selection as modsel

test_dir = "/Volumes/Transcend/repositories/my_qml_fork/qml/test/"

filenames = glob.glob(test_dir + "/qm7/*.xyz")
energies = np.loadtxt(test_dir + '/data/hof_qm7.txt', usecols=[1])
filenames.sort()

n_samples = 500

estimator = ARMP(representation_name="acsf", iterations=100)
estimator.generate_compounds(filenames[:n_samples])
estimator.set_properties(energies[:n_samples])
estimator.generate_representation(method="fortran")

idx = np.arange(0, n_samples)
idx_train, idx_test = modsel.train_test_split(idx,
                                              random_state=42,
                                              shuffle=True,
                                              test_size=0.1)

estimator.fit(idx_train)

estimator.score(idx_train)
Beispiel #8
0
    "radial_rs": np.arange(0, 10, 0.5),
    "angular_rs": np.arange(0, 10, 0.5),
    "theta_s": np.arange(0, 3.14, 0.25)
}

estimator = ARMP(iterations=2000,
                 batch_size=256,
                 l1_reg=0.0001,
                 l2_reg=0.005,
                 learning_rate=0.00015,
                 representation='acsf',
                 representation_params=acsf_params,
                 tensorboard=True,
                 store_frequency=50)

estimator.set_properties(ene)
estimator.generate_representation(xyz, zs)

idx = list(range(n_samples))
idx_train, idx_test = modsel.train_test_split(idx,
                                              test_size=0.15,
                                              random_state=42,
                                              shuffle=True)

all_scores = []

for lr in learning_rate:
    for l1 in l1_reg:
        for l2 in l2_reg:

            estimator.fit(idx_train)
    estimator = ARMP(
        iterations=10,
        l1_reg=0.0,
        l2_reg=0.0,
        hidden_layer_sizes=(40, 20, 10),
        tensorboard=True,
        store_frequency=10,
        # batch_size=400,
        batch_size=n_train,
        learning_rate=0.1,
        # scoring_function="mae",
    )

    estimator.set_representations(representations=X)
    estimator.set_classes(Z)
    estimator.set_properties(Y)

    # idx = np.arange(0,100)

    # estimator.fit(idx)

    # score = estimator.score(idx)

    # estimator.fit(x=representation, y=energies, classes=zs)
    estimator.fit(x=X, y=Y, classes=Z)

    ##  ------------- ** Predicting and scoring ** ---------------

    score = estimator.score(x=X, y=Y, classes=Z)

    print("The mean absolute error is %s kJ/mol." % (str(-score)))
    "eta": 39.81824764370754
}
estimator = ARMP(iterations=2633,
                 batch_size=22,
                 l1_reg=1.46e-05,
                 l2_reg=0.0001,
                 learning_rate=0.0013,
                 representation_name='acsf',
                 representation_params=acsf_params,
                 tensorboard=True,
                 store_frequency=25,
                 hidden_layer_sizes=(185, ))

# Loading the model previously trained
estimator.load_nn("../trained_nn/vr-nn")
estimator.set_properties(ene_surface)

# Generating the representation
start = time.time()
estimator.generate_representation(xyz_surface, zs_surface, method="fortran")
end = time.time()
print("The time taken to generate the representations is %s s" %
      (str(end - start)))
print("The shape of the representations is %s" %
      (str(estimator.representation.shape)))

# Predicting the energies
idx = list(range(n_samples))
predictions = estimator.predict(idx)

# Printing the mean absolute error
Beispiel #11
0
xyz = np.array([[[0, 1, 0], [0, 1, 1], [1, 0, 1]],
                    [[1, 2, 2], [3, 1, 2], [1, 3, 4]],
                    [[4, 1, 2], [0.5, 5, 6], [-1, 2, 3]]])
zs = np.array([[1, 2, 3],
               [1, 2, 3],
               [1, 2, 3]])

ene_true = np.array([0.5, 0.9, 1.0])

estimator = ARMP(iterations=10, l1_reg=0.0001, l2_reg=0.005, learning_rate=0.0005, representation='acsf',
                 representation_params={"radial_rs": np.arange(0, 10, 5), "angular_rs": np.arange(0, 10, 5),
                                        "theta_s": np.arange(0, 3.14, 3)},
                 tensorboard=True, store_frequency=10
                 )

estimator.set_properties(ene_true)
estimator.generate_representation(xyz, zs)

idx = list(range(xyz.shape[0]))

estimator.fit(idx)
estimator.save_nn(save_dir="temp")

pred1 = estimator.predict(idx)

estimator.loaded_model = True

estimator.fit(idx)

pred2 = estimator.predict(idx)
estimator.session.close()