Beispiel #1
0
def test_score_3():
    """
    This function tests that all the scoring functions work.
    """
    test_dir = os.path.dirname(os.path.realpath(__file__))

    data = np.load(test_dir + "/data/local_slatm_ch4cn_light.npz")
    descriptor = data["arr_0"]
    classes = data["arr_1"]
    energies = data["arr_2"]

    estimator_1 = ARMP(scoring_function='mae')
    estimator_1.fit(x=descriptor, y=energies, classes=classes)
    estimator_1.score(x=descriptor, y=energies, classes=classes)

    estimator_2 = ARMP(scoring_function='r2')
    estimator_2.fit(x=descriptor, y=energies, classes=classes)
    estimator_2.score(x=descriptor, y=energies, classes=classes)

    estimator_3 = ARMP(scoring_function='rmse')
    estimator_3.fit(x=descriptor, y=energies, classes=classes)
    estimator_3.score(x=descriptor, y=energies, classes=classes)
Beispiel #2
0
import os
import numpy as np

## ------------- ** Loading the data ** ---------------

current_dir = os.path.dirname(os.path.realpath(__file__))
data = np.load(current_dir + '/../test/data/local_slatm_ch4cn_light.npz')

representation = data["arr_0"]
zs = data["arr_1"]
energies = data["arr_2"]

## ------------- ** Setting up the estimator ** ---------------

estimator = ARMP(iterations=150,
                 l2_reg=0.0,
                 learning_rate=0.005,
                 hidden_layer_sizes=(40, 20, 10))

##  ------------- ** Fitting to the data ** ---------------

estimator.fit(x=representation, y=energies, classes=zs)

##  ------------- ** Predicting and scoring ** ---------------

score = estimator.score(x=representation, y=energies, classes=zs)

print("The mean absolute error is %s kJ/mol." % (str(-score)))

energies_predict = estimator.predict(x=representation, classes=zs)
Beispiel #3
0
## ------------- ** Loading the data ** ---------------

current_dir = os.path.dirname(os.path.realpath(__file__))
data = np.load(current_dir + '/../test/data/local_slatm_ch4cn_light.npz')

descriptor = data["arr_0"]
zs = data["arr_1"]
energies = data["arr_2"]

## ------------- ** Setting up the estimator ** ---------------

estimator = ARMP(iterations=100, l2_reg=0.0)

estimator.set_representations(representations=descriptor)
estimator.set_classes(zs)
estimator.set_properties(energies)

##  ------------- ** Fitting to the data ** ---------------

idx = np.arange(0, 100)

estimator.fit(idx)

##  ------------- ** Predicting and scoring ** ---------------

score = estimator.score(idx)

print("The mean absolute error is %s kJ/mol." % (str(-score)))

energies_predict = estimator.predict(idx)
Beispiel #4
0
print("Generated the representations")
print(estimator.representation.shape)

idx = list(range(n_samples))
idx_train, idx_test = modsel.train_test_split(idx,
                                              random_state=42,
                                              shuffle=True)

estimator.fit(idx_train)

data_squal = h5py.File(
    "/Volumes/Transcend/data_sets/CN_squalane/dft/squalane_cn_dft.hdf5", "r")

xyz_squal = np.array(data_squal.get("xyz")[:10])
zs_squal = np.array(data_squal.get("zs")[:10], dtype=np.int32)
ene_squal = np.array(data_squal.get("ene")[:10]) * 2625.50
ene_squal = ene_squal - ref_ene

estimator.score(idx_test)

pred1 = estimator.predict_from_xyz(xyz_squal, zs_squal)
print("Done squal pred")
pred2 = estimator.predict(idx_test)

# estimator.save_nn()
#
# print(pred1)
# print(pred2)
#
plt.scatter(pred2, pred1)
plt.show()
Beispiel #5
0
estimator = ARMP(iterations=6000,
                 representation_name='acsf',
                 representation_params=acsf_params,
                 l1_reg=0.0,
                 l2_reg=0.0,
                 scoring_function="rmse",
                 tensorboard=False,
                 store_frequency=10,
                 learning_rate=0.075)

estimator.set_properties(energies[:100])
estimator.generate_compounds(filenames[:100])
estimator.generate_representation(method="tf")
print(estimator.representation.shape)

idx = list(range(100))

idx_train, idx_test = modsel.train_test_split(idx,
                                              test_size=0,
                                              random_state=42,
                                              shuffle=True)

estimator.fit(idx_train)

score = estimator.score(idx_train)
print("The RMSE is %s kcal/mol." % (str(score)))

ene_pred = estimator.predict(idx_train)

plt.scatter(energies[idx_train], ene_pred)
plt.show()
# Training the model on 3 folds of n data points
for n in n_samples:

    cv_idx = idx_train[:n]
    splitter = modsel.KFold(n_splits=3, random_state=42, shuffle=True)
    indices = splitter.split(cv_idx)

    scores_per_fold = []
    traj_scores_per_fold = []

    for item in indices:
        idx_train_fold = cv_idx[item[0]]
        idx_test_fold = cv_idx[item[1]]

        estimator.fit(idx_train_fold)

        score = estimator.score(idx_test_fold)
        traj_score = estimator.score(idx_test)
        scores_per_fold.append(score)
        traj_scores_per_fold.append(traj_score)

        tf.reset_default_graph()

    scores.append(scores_per_fold)
    traj_scores.append(traj_scores_per_fold)

# Saving the data to an .npz file
np.savez("./plot/scores_vr.npz", np.asarray(n_samples), np.asarray(scores),
         np.asarray(traj_scores))
        l2_reg=0.0,
        hidden_layer_sizes=(40, 20, 10),
        tensorboard=True,
        store_frequency=10,
        # batch_size=400,
        batch_size=n_train,
        learning_rate=0.1,
        # scoring_function="mae",
    )

    estimator.set_representations(representations=X)
    estimator.set_classes(Z)
    estimator.set_properties(Y)

    # idx = np.arange(0,100)

    # estimator.fit(idx)

    # score = estimator.score(idx)

    # estimator.fit(x=representation, y=energies, classes=zs)
    estimator.fit(x=X, y=Y, classes=Z)

    ##  ------------- ** Predicting and scoring ** ---------------

    score = estimator.score(x=X, y=Y, classes=Z)

    print("The mean absolute error is %s kJ/mol." % (str(-score)))

    # energies_predict = estimator.predict(idx)
    # print(energies_predict)