コード例 #1
0
ファイル: test_armp.py プロジェクト: charnley/qml
def test_fit_1():
    """
    This function tests the first way of fitting the descriptor: the data is passed by first creating compounds and then
    the descriptors are created from the compounds.
    """
    test_dir = os.path.dirname(os.path.realpath(__file__))

    filenames = glob.glob(test_dir + "/CN_isobutane/*.xyz")
    energies = np.loadtxt(test_dir + '/CN_isobutane/prop_kjmol_training.txt',
                          usecols=[1])
    filenames.sort()

    estimator = ARMP(representation="acsf")
    estimator.generate_compounds(filenames[:50])
    estimator.set_properties(energies[:50])
    estimator.generate_representation()

    idx = np.arange(0, 50)
    estimator.fit(idx)
コード例 #2
0
}

# Generate estimator
estimator = ARMP(iterations=10,
                 l1_reg=0.0001,
                 l2_reg=0.005,
                 learning_rate=0.0005,
                 representation_name='acsf',
                 representation_params=acsf_params,
                 tensorboard=True,
                 store_frequency=2,
                 hidden_layer_sizes=(50, 30, 10),
                 batch_size=200)

estimator.set_properties(ene_isopent)
estimator.generate_representation(pad_xyz, pad_zs, method='fortran')

print("Generated the representations")
print(estimator.representation.shape)

idx = list(range(n_samples))
idx_train, idx_test = modsel.train_test_split(idx,
                                              random_state=42,
                                              shuffle=True)

estimator.fit(idx_train)

data_squal = h5py.File(
    "/Volumes/Transcend/data_sets/CN_squalane/dft/squalane_cn_dft.hdf5", "r")

xyz_squal = np.array(data_squal.get("xyz")[:10])
コード例 #3
0
    "zeta": 220.127,
    "eta": 30.8065
}
estimator = ARMP(iterations=6000,
                 representation_name='acsf',
                 representation_params=acsf_params,
                 l1_reg=0.0,
                 l2_reg=0.0,
                 scoring_function="rmse",
                 tensorboard=False,
                 store_frequency=10,
                 learning_rate=0.075)

estimator.set_properties(energies[:100])
estimator.generate_compounds(filenames[:100])
estimator.generate_representation(method="tf")
print(estimator.representation.shape)

idx = list(range(100))

idx_train, idx_test = modsel.train_test_split(idx,
                                              test_size=0,
                                              random_state=42,
                                              shuffle=True)

estimator.fit(idx_train)

score = estimator.score(idx_train)
print("The RMSE is %s kcal/mol." % (str(score)))

ene_pred = estimator.predict(idx_train)
コード例 #4
0
from qml.aglaia.aglaia import ARMP
import glob
import numpy as np
from sklearn import model_selection as modsel

test_dir = "/Volumes/Transcend/repositories/my_qml_fork/qml/test/"

filenames = glob.glob(test_dir + "/qm7/*.xyz")
energies = np.loadtxt(test_dir + '/data/hof_qm7.txt', usecols=[1])
filenames.sort()

n_samples = 500

estimator = ARMP(representation_name="acsf", iterations=100)
estimator.generate_compounds(filenames[:n_samples])
estimator.set_properties(energies[:n_samples])
estimator.generate_representation(method="fortran")

idx = np.arange(0, n_samples)
idx_train, idx_test = modsel.train_test_split(idx,
                                              random_state=42,
                                              shuffle=True,
                                              test_size=0.1)

estimator.fit(idx_train)

estimator.score(idx_train)
コード例 #5
0
ファイル: make_pickle.py プロジェクト: SilviaAmAm/qml_scripts
    "angular_rs": np.arange(0, 10, 0.5),
    "theta_s": np.arange(0, 3.14, 0.25)
}

estimator = ARMP(iterations=2000,
                 batch_size=256,
                 l1_reg=0.0001,
                 l2_reg=0.005,
                 learning_rate=0.00015,
                 representation='acsf',
                 representation_params=acsf_params,
                 tensorboard=True,
                 store_frequency=50)

estimator.set_properties(ene)
estimator.generate_representation(xyz, zs)

idx = list(range(n_samples))
idx_train, idx_test = modsel.train_test_split(idx,
                                              test_size=0.15,
                                              random_state=42,
                                              shuffle=True)

all_scores = []

for lr in learning_rate:
    for l1 in l1_reg:
        for l2 in l2_reg:

            estimator.fit(idx_train)
            score = estimator.score(idx_test)
コード例 #6
0
    "zeta": 100.06564927139748,
    "eta": 39.81824764370754
}
estimator = ARMP(iterations=2633,
                 batch_size=22,
                 l1_reg=1.46e-05,
                 l2_reg=0.0001,
                 learning_rate=0.0013,
                 representation_name='acsf',
                 representation_params=acsf_params,
                 tensorboard=True,
                 store_frequency=25,
                 hidden_layer_sizes=(185, ))

estimator.set_properties(ene_isopent)
estimator.generate_representation(xyz_isopent, zs_isopent, method="fortran")

# Training the model on 3 folds of n data points
for n in n_samples:

    cv_idx = idx_train[:n]
    splitter = modsel.KFold(n_splits=3, random_state=42, shuffle=True)
    indices = splitter.split(cv_idx)

    scores_per_fold = []
    traj_scores_per_fold = []

    for item in indices:
        idx_train_fold = cv_idx[item[0]]
        idx_test_fold = cv_idx[item[1]]
コード例 #7
0
                 l1_reg=1.46e-05,
                 l2_reg=0.0001,
                 learning_rate=0.0013,
                 representation_name='acsf',
                 representation_params=acsf_params,
                 tensorboard=True,
                 store_frequency=25,
                 hidden_layer_sizes=(185, ))

# Loading the model previously trained
estimator.load_nn("../trained_nn/vr-nn")
estimator.set_properties(ene_surface)

# Generating the representation
start = time.time()
estimator.generate_representation(xyz_surface, zs_surface, method="fortran")
end = time.time()
print("The time taken to generate the representations is %s s" %
      (str(end - start)))
print("The shape of the representations is %s" %
      (str(estimator.representation.shape)))

# Predicting the energies
idx = list(range(n_samples))
predictions = estimator.predict(idx)

# Printing the mean absolute error
mae = mean_absolute_error(ene_surface, predictions)
print("The MAE is %.2f kJ/mol" % mae)

# Saving the results to a HDF5 file
コード例 #8
0
ファイル: ARMP_1.py プロジェクト: charnley/qml
## ------------- ** Loading the data ** ---------------

current_dir = os.path.dirname(os.path.realpath(__file__))
filenames = glob.glob(current_dir + '/../test/CN_isobutane/*.xyz')
energies = np.loadtxt(current_dir + '/../test/CN_isobutane/prop_kjmol_training.txt', usecols=[1])
filenames.sort()

## ------------- ** Setting up the estimator ** ---------------

estimator = ARMP(iterations=10, representation='acsf', representation_params={"radial_rs": np.arange(0, 10, 1), "angular_rs": np.arange(0.5, 10.5, 1),
"theta_s": np.arange(0, 5, 1)}, tensorboard=False)

estimator.generate_compounds(filenames)
estimator.set_properties(energies)

estimator.generate_representation()

##  ------------- ** Fitting to the data ** ---------------

idx = np.arange(0,100)

estimator.fit(idx)


##  ------------- ** Predicting and scoring ** ---------------

score = estimator.score(idx)

print("The mean absolute error is %s kJ/mol." % (str(-score)))

energies_predict = estimator.predict(idx)
コード例 #9
0
ファイル: train.py プロジェクト: SilviaAmAm/qml_scripts
# Getting the dataset
data = h5py.File("/Volumes/Transcend/data_sets/CN_isopentane/pruned_dft_with_forces/pruned_isopentane_cn_dft.hdf5", "r")

n_samples = 500

xyz = np.array(data.get("xyz")[-n_samples:])
ene = np.array(data.get("ene")[-n_samples:])*2625.50
ene = ene - data.get("ene")[0]*2625.50
zs = np.array(data["zs"][-n_samples:], dtype=np.int32)

# Creating the estimator
acsf_param = {"nRs2": 5, "nRs3": 5, "nTs": 5, "rcut": 5, "acut": 5, "zeta": 220.127, "eta": 30.8065}
estimator = ARMP(iterations=1000, batch_size=512, l1_reg=0.0, l2_reg=0.0, learning_rate=0.001, representation_name='acsf',
                 representation_params=acsf_param, tensorboard=False, store_frequency=50)
estimator.set_properties(ene)
estimator.generate_representation(xyz, zs, method='fortran')
print(estimator.g.shape)

# Doing cross validation
idx = list(range(n_samples))
idx_train, idx_test = modsel.train_test_split(idx, test_size=0.15, random_state=42, shuffle=False)

print("Starting the fitting...")
estimator.fit(idx_train)

# estimator.save_nn("saved_model")

pred1 = estimator.predict(idx_train)
pred2 = estimator.predict_from_xyz(xyz[idx_train], zs[idx_train])