Beispiel #1
0
def test_fit_3():
    """
    This function tests the thrid way of fitting the descriptor: the data is passed directly to the fit function.
    """
    test_dir = os.path.dirname(os.path.realpath(__file__))

    data = np.load(test_dir + "/data/local_slatm_ch4cn_light.npz")
    descriptor = data["arr_0"]
    classes = data["arr_1"]
    energies = data["arr_2"]

    estimator = ARMP()
    estimator.fit(x=descriptor, y=energies, classes=classes)
Beispiel #2
0
def test_predict_3():
    test_dir = os.path.dirname(os.path.realpath(__file__))

    data = np.load(test_dir + "/data/local_slatm_ch4cn_light.npz")
    descriptor = data["arr_0"]
    classes = data["arr_1"]
    energies = data["arr_2"]

    estimator = ARMP()
    estimator.fit(x=descriptor, y=energies, classes=classes)
    energies_pred = estimator.predict(x=descriptor, classes=classes)

    assert energies.shape == energies_pred.shape
Beispiel #3
0
def test_fit_2():
    """
    This function tests the second way of fitting the descriptor: the data is passed by storing the compounds in the
    class.
    """
    test_dir = os.path.dirname(os.path.realpath(__file__))

    data = np.load(test_dir + "/data/local_slatm_ch4cn_light.npz")
    descriptor = data["arr_0"]
    classes = data["arr_1"]
    energies = data["arr_2"]

    estimator = ARMP()
    estimator.set_representations(representations=descriptor)
    estimator.set_classes(classes=classes)
    estimator.set_properties(energies)

    idx = np.arange(0, 100)
    estimator.fit(idx)
Beispiel #4
0
def test_fit_1():
    """
    This function tests the first way of fitting the descriptor: the data is passed by first creating compounds and then
    the descriptors are created from the compounds.
    """
    test_dir = os.path.dirname(os.path.realpath(__file__))

    filenames = glob.glob(test_dir + "/CN_isobutane/*.xyz")
    energies = np.loadtxt(test_dir + '/CN_isobutane/prop_kjmol_training.txt',
                          usecols=[1])
    filenames.sort()

    estimator = ARMP(representation="acsf")
    estimator.generate_compounds(filenames[:50])
    estimator.set_properties(energies[:50])
    estimator.generate_representation()

    idx = np.arange(0, 50)
    estimator.fit(idx)
Beispiel #5
0
def test_score_3():
    """
    This function tests that all the scoring functions work.
    """
    test_dir = os.path.dirname(os.path.realpath(__file__))

    data = np.load(test_dir + "/data/local_slatm_ch4cn_light.npz")
    descriptor = data["arr_0"]
    classes = data["arr_1"]
    energies = data["arr_2"]

    estimator_1 = ARMP(scoring_function='mae')
    estimator_1.fit(x=descriptor, y=energies, classes=classes)
    estimator_1.score(x=descriptor, y=energies, classes=classes)

    estimator_2 = ARMP(scoring_function='r2')
    estimator_2.fit(x=descriptor, y=energies, classes=classes)
    estimator_2.score(x=descriptor, y=energies, classes=classes)

    estimator_3 = ARMP(scoring_function='rmse')
    estimator_3.fit(x=descriptor, y=energies, classes=classes)
    estimator_3.score(x=descriptor, y=energies, classes=classes)
Beispiel #6
0
import os
import numpy as np

## ------------- ** Loading the data ** ---------------

current_dir = os.path.dirname(os.path.realpath(__file__))
data = np.load(current_dir + '/../test/data/local_slatm_ch4cn_light.npz')

representation = data["arr_0"]
zs = data["arr_1"]
energies = data["arr_2"]

## ------------- ** Setting up the estimator ** ---------------

estimator = ARMP(iterations=150,
                 l2_reg=0.0,
                 learning_rate=0.005,
                 hidden_layer_sizes=(40, 20, 10))

##  ------------- ** Fitting to the data ** ---------------

estimator.fit(x=representation, y=energies, classes=zs)

##  ------------- ** Predicting and scoring ** ---------------

score = estimator.score(x=representation, y=energies, classes=zs)

print("The mean absolute error is %s kJ/mol." % (str(-score)))

energies_predict = estimator.predict(x=representation, classes=zs)
Beispiel #7
0
## ------------- ** Loading the data ** ---------------

current_dir = os.path.dirname(os.path.realpath(__file__))
data = np.load(current_dir + '/../test/data/local_slatm_ch4cn_light.npz')

descriptor = data["arr_0"]
zs = data["arr_1"]
energies = data["arr_2"]

## ------------- ** Setting up the estimator ** ---------------

estimator = ARMP(iterations=100, l2_reg=0.0)

estimator.set_representations(representations=descriptor)
estimator.set_classes(zs)
estimator.set_properties(energies)

##  ------------- ** Fitting to the data ** ---------------

idx = np.arange(0, 100)

estimator.fit(idx)

##  ------------- ** Predicting and scoring ** ---------------

score = estimator.score(idx)

print("The mean absolute error is %s kJ/mol." % (str(-score)))

energies_predict = estimator.predict(idx)
Beispiel #8
0
                 store_frequency=2,
                 hidden_layer_sizes=(50, 30, 10),
                 batch_size=200)

estimator.set_properties(ene_isopent)
estimator.generate_representation(pad_xyz, pad_zs, method='fortran')

print("Generated the representations")
print(estimator.representation.shape)

idx = list(range(n_samples))
idx_train, idx_test = modsel.train_test_split(idx,
                                              random_state=42,
                                              shuffle=True)

estimator.fit(idx_train)

data_squal = h5py.File(
    "/Volumes/Transcend/data_sets/CN_squalane/dft/squalane_cn_dft.hdf5", "r")

xyz_squal = np.array(data_squal.get("xyz")[:10])
zs_squal = np.array(data_squal.get("zs")[:10], dtype=np.int32)
ene_squal = np.array(data_squal.get("ene")[:10]) * 2625.50
ene_squal = ene_squal - ref_ene

estimator.score(idx_test)

pred1 = estimator.predict_from_xyz(xyz_squal, zs_squal)
print("Done squal pred")
pred2 = estimator.predict(idx_test)
# Training the model on 3 folds of n data points
for n in n_samples:

    cv_idx = idx_train[:n]
    splitter = modsel.KFold(n_splits=3, random_state=42, shuffle=True)
    indices = splitter.split(cv_idx)

    scores_per_fold = []
    traj_scores_per_fold = []

    for item in indices:
        idx_train_fold = cv_idx[item[0]]
        idx_test_fold = cv_idx[item[1]]

        estimator.fit(idx_train_fold)

        score = estimator.score(idx_test_fold)
        traj_score = estimator.score(idx_test)
        scores_per_fold.append(score)
        traj_scores_per_fold.append(traj_score)

        tf.reset_default_graph()

    scores.append(scores_per_fold)
    traj_scores.append(traj_scores_per_fold)

# Saving the data to an .npz file
np.savez("./plot/scores_vr.npz", np.asarray(n_samples), np.asarray(scores),
         np.asarray(traj_scores))
Beispiel #10
0
        l2_reg=0.0,
        hidden_layer_sizes=(40, 20, 10),
        tensorboard=True,
        store_frequency=10,
        # batch_size=400,
        batch_size=n_train,
        learning_rate=0.1,
        # scoring_function="mae",
    )

    estimator.set_representations(representations=X)
    estimator.set_classes(Z)
    estimator.set_properties(Y)

    # idx = np.arange(0,100)

    # estimator.fit(idx)

    # score = estimator.score(idx)

    # estimator.fit(x=representation, y=energies, classes=zs)
    estimator.fit(x=X, y=Y, classes=Z)

    ##  ------------- ** Predicting and scoring ** ---------------

    score = estimator.score(x=X, y=Y, classes=Z)

    print("The mean absolute error is %s kJ/mol." % (str(-score)))

    # energies_predict = estimator.predict(idx)
    # print(energies_predict)
Beispiel #11
0
               [1, 2, 3]])

ene_true = np.array([0.5, 0.9, 1.0])

estimator = ARMP(iterations=10, l1_reg=0.0001, l2_reg=0.005, learning_rate=0.0005, representation='acsf',
                 representation_params={"radial_rs": np.arange(0, 10, 5), "angular_rs": np.arange(0, 10, 5),
                                        "theta_s": np.arange(0, 3.14, 3)},
                 tensorboard=True, store_frequency=10
                 )

estimator.set_properties(ene_true)
estimator.generate_representation(xyz, zs)

idx = list(range(xyz.shape[0]))

estimator.fit(idx)
estimator.save_nn(save_dir="temp")

pred1 = estimator.predict(idx)

estimator.loaded_model = True

estimator.fit(idx)

pred2 = estimator.predict(idx)
estimator.session.close()
tf.reset_default_graph()

new_estimator = ARMP(iterations=10, l1_reg=0.0001, l2_reg=0.005, learning_rate=0.0005, representation='acsf',
                 representation_params={"radial_rs": np.arange(0, 10, 5), "angular_rs": np.arange(0, 10, 5),
                                        "theta_s": np.arange(0, 3.14, 3)},
ene_isopent = ene_isopent - ref_ene
zs_isopent = np.array(data.get("zs"), dtype=np.int32)
traj_idx = np.array(data.get("traj_idx"))
file_idx = np.array(data.get("Filenumber"))

# Finding the indices of the last trajectory so that it can be used as a test set
idx_test = np.where(traj_idx==22)[0]
idx_train = np.where(traj_idx!=22)[0]
shuffle(idx_train)

# Making sure that the model is trained on the same number of samples as the MD-NN
idx_train_half = idx_train[:7621]

# Creating the estimator
acsf_params = {"nRs2":14, "nRs3":14, "nTs":14, "rcut":3.29, "acut":3.29, "zeta":100.06564927139748, "eta":39.81824764370754}
estimator = ARMP(iterations=2633, batch_size=22, l1_reg=1.46e-05, l2_reg=0.0001, learning_rate=0.0013, representation_name='acsf',
                 representation_params=acsf_params, tensorboard=True, store_frequency=25, hidden_layer_sizes=(185,))

estimator.set_properties(ene_isopent)
estimator.generate_representation(xyz_isopent, zs_isopent, method="fortran")

# Fitting the estimator and scoring it
estimator.fit(idx_train_half)
score = estimator.score(idx_test)

# Saving the model for later reuse
model_name = "vr-nn"
estimator.save_nn(model_name)

print(score)