def test_gaussian_kernel(): np.random.seed(666) n_train = 25 n_test = 20 # List of dummy representations X = np.random.rand(n_train, 1000) Xs = np.random.rand(n_test, 1000) sigma = 100.0 Ktest = np.zeros((n_train, n_test)) for i in range(n_train): for j in range(n_test): Ktest[i, j] = np.exp( np.sum(np.square(X[i] - Xs[j])) / (-2.0 * sigma**2)) K = gaussian_kernel(X, Xs, sigma) # Compare two implementations: assert np.allclose(K, Ktest), "Error in Gaussian kernel" Ksymm = gaussian_kernel(X, X, sigma) # Check for symmetry: assert np.allclose(Ksymm, Ksymm.T), "Error in Gaussian kernel"
def do_qml_gaussian_kernel(self): #K is also a np array, create kernel matrix K = gaussian_kernel(self.x_training, self.x_training, self.sigma) #add small lambda to the diagonal of the kernel matrix K[np.diag_indices_from(K)] += self.lamda #use the built in Cholesky-decomposition to solve alpha = cho_solve(K, self.y_training) #predict new, calculate kernel matrix between test and training Ks = gaussian_kernel(self.x_test, self.x_training, self.sigma) #make prediction Y_predicted = np.dot(Ks, alpha) # Calculate mean-absolute-error (MAE): self.mae = np.mean(np.abs(Y_predicted - self.y_test)) self.test_predicted_results = Y_predicted
def test_kernels(): import sys import numpy as np import qml from qml.kernels import laplacian_kernel from qml.kernels import gaussian_kernel n_train = 25 n_test = 20 # List of dummy representations X = np.random.rand(n_train, 1000) Xs = np.random.rand(n_test, 1000) sigma = 100.0 Gtest = np.zeros((n_train, n_test)) Ltest = np.zeros((n_train, n_test)) for i in range(n_train): for j in range(n_test): Gtest[i,j] = np.exp( np.sum(np.square(X[i] - Xs[j])) / (-2.0 * sigma**2)) Ltest[i,j] = np.exp( np.sum(np.abs(X[i] - Xs[j])) / (-1.0 * sigma)) G = gaussian_kernel(X, Xs, sigma) L = laplacian_kernel(X, Xs, sigma) # Compare two implementations: assert np.allclose(G, Gtest), "Error in Gaussian kernel" assert np.allclose(L, Ltest), "Error in Laplacian kernel" Gsymm = gaussian_kernel(X, X, sigma) Lsymm = laplacian_kernel(X, X, sigma) # Check for symmetry: assert np.allclose(Gsymm, Gsymm.T), "Error in Gaussian kernel" assert np.allclose(Lsymm, Lsymm.T), "Error in Laplacian kernel"
for mol in compounds: mol.generate_coulomb_matrix(size=23, sorting="row-norm") # mol.generate_bob(size=23, asize={"O":3, "C":7, "N":3, "H":16, "S":1}) # Make a big 2D array with all the representations X = np.array([mol.representation for mol in compounds]) # X = np.array([mol.bob for mol in compounds]) # Print all representations print("Representations:") print(X) # Assign 1000 first molecules to the training set X_training = X[:1000] Y_training = energy_pbe0[:1000] sigma = 4000.0 K = gaussian_kernel(X_training, X_training, sigma) print("Gaussian kernel:") print(K) # Add a small lambda to the diagonal of the kernel matrix K[np.diag_indices_from(K)] += 1e-8 # Use the built-in Cholesky-decomposition to solve alpha = cho_solve(K, Y_training) print("Alphas:") print(alpha)
print(energy_pbe0) # Assign 1000 first molecules to the training set X_training = X[:1000] Y_training = energy_pbe0[:1000] # Y_training = energy_delta[:1000] # Assign 1000 first molecules to the training set X_test = X[-1000:] Y_test = energy_pbe0[-1000:] # Y_test = energy_delta[-1000:] # Calculate the Gaussian kernel sigma = 700.0 K = gaussian_kernel(X_training, X_training, sigma) print(K) # Add a small lambda to the diagonal of the kernel matrix K[np.diag_indices_from(K)] += 1e-8 # Use the built-in Cholesky-decomposition to solve alpha = cho_solve(K, Y_training) print(alpha) # Assign 1000 last molecules to the test set X_test = X[-1000:] Y_test = energy_pbe0[-1000:] # calculate a kernel matrix between test and training data, using the same sigma
Yprime = np.asarray([mol.properties for mol in mols]) Ytest = np.asarray([mol.properties for mol in mols_test]) """np.save("data/krr/trainingFCHL", X) np.save("data/krr/testFCHL", X_test)""" print("\n -> calculating kernels") random.seed(667) """ Calculating kernel functions and crossvalidation. """ print("\n -> calculating cross validation and predictions") for j in tqdm(range(len(sigmas))): K = gaussian_kernel(X, X, sigmas[j]) K_test = gaussian_kernel(X, X_test, sigmas[j]) for train in N: test = total - train maes = [] for i in range(nModels): split = list(range(total)) random.shuffle(split) training_index = split[:train] test_index = split[-test:] Y = Yprime[training_index] Ys = Yprime[test_index]
def __init__(self, wds, ia1, ia2, coeff=1.0, llambda=1.e-4): """ ia1, ia2 -- atomic index, starting from 0, """ s1 = SLATM(wds, 'out', regexp='', properties='AE', M='slatm', \ local=True, igroup=False, ow=False, nproc=1, istart=0, \ slatm_params = { 'nbody':3, 'dgrids': [0.03,0.03], 'sigmas':[0.05,0.05],\ 'rcut':4.8, 'rpower2':6, 'ws':[1.,1.,1.], \ 'rpower3': 3, 'isf':0, 'kernel':'g', 'intc':3 }, \ iY=False) fs = s1.fs coords = s1.coords iast2 = s1.nas.cumsum() iast1 = np.array([ 0, ] + list(kas2[:-1])) objs = [] ds = [] for i, f in enumerate(fs): obj = wfn(f) obj.get_dm() objs.append(obj) if i < self.nm - 1: ds.append(ssd.cdist(coords[i], coords[self.nm - 1])) ## specify target atom pairs!! #ia1, ia2 = 0, 1 #coeff = 1.0; llambda = 1e-6 cia1 = coords[-1][ia1] cia2 = coords[-1][ia2] xs = [] ys = [] nhass = [] for i, f in enumerate(fs): dsi = ds[i] jas = np.arange(dsi.shape[0]) filt1 = (dsi[:, ia1] <= 0.01) filt2 = (dsi[:, ia2] <= 0.01) if np.any(filt1) and np.any(filt2): nhass.append(s1.nhass[i]) obj = objs[i] ja1 = jas[filt1] ja2 = jas[filt2] p, q, r, s = obj.ibs1[ja1], obj.ibs2[ja1], obj.ibs1[ ja2], obj.ibs2[ja2] dmij = obj.dm[p:q, r:s].ravel() ys.append(dmij) iat1 = iast1[i] + ja1 iat2 = iast1[i] + ja2 x1 = s1.X[iat1] x2 = s1.X[iat2] xs.append(np.concatenate((x1, x2), axis=0)) nprop = len(dmij) nt = len(nhass) nhass = np.array(nhass) tidxs = np.arange(nt) nhass_u = np.unique(nhass) nu = len(nhass_u) xs = np.array(xs) ys = np.array(ys) xs2 = np.array([xs[-1]]) ys2 = np.array([ys[-1]]) for j in range(nu): jdxs = tidxs[nhass <= nhass_u[j]] xs1 = xs[jdxs, :] ys1 = ys[jdxs, :] ds1 = qd.l2_distance(X1, X1) # ssd.pdist(x1, metric='euclidean') dmax = max(ds1.ravel()) sigma = coeff * dmax / np.sqrt(2.0 * np.log(2.0)) K1 = qk.gaussian_kernel(xs1, xs1, sigma) assert np.allclose(K1, K1.T), "Error in local Gaussian kernel symmetry" K1[np.diag_indices_from(K1)] += llambda alpha = np.array([cho_solve(K1, ys1)]).T K2 = qk.gaussian_kernel(xs2, xs1, sigma) ys2_est = np.dot(K2, alpha) error = np.squeeze(ys2_est) - ys2 mae = np.sum(np.abs(error)) / nprop rmse = np.sqrt(np.sum(error**2) / nprop) print('%4d %12.8f %12.8f' % (len(xs1), mae, rmse))
def compute_kernel_qml(X_i, X_j, sigma=1e3): return gaussian_kernel(X_i, X_j, sigma)
from tutorial_data import compounds from qml.kernels import gaussian_kernel # For every compound generate a coulomb matrix or BoB for mol in compounds: mol.generate_coulomb_matrix(size=23, sorting="row-norm") # mol.generate_bob(size=23, asize={"O":3, "C":7, "N":3, "H":16, "S":1}) # Make a big 2D array with all the representations X = np.array([mol.representation for mol in compounds]) # X = np.array([mol.bob for mol in compounds]) # Print all representations print("Representations:") print(X) # Run on only a subset of the first 100 (for speed) X = X[:100] # Define the kernel width sigma = 1000.0 # K is also a Numpy array K = gaussian_kernel(X, X, sigma) # Print the kernel print("Gaussian kernel:") print(K)
Y = np.array([mol.properties for mol in training]) Ys = np.array([mol.properties for mol in test]) # List of representations mbtypes = get_slatm_mbtypes(np.array([mol.nuclear_charges for mol in mols])) for i, mol in enumerate(training): mol.generate_slatm(mbtypes, local=False, rpower=6) for i, mol in enumerate(test): mol.generate_slatm(mbtypes, local=False, rpower=6) X = np.array([mol.representation for mol in training]) Xs = np.array([mol.representation for mol in test]) # Generate training Kernel K = gaussian_kernel(X, X, sigma) Ks = gaussian_kernel(X, Xs, sigma) K[np.diag_indices_from(K)] += llambda alpha = cho_solve(K, Y) # Calculate prediction kernel Yss = np.dot(Ks.transpose(), alpha) mae = np.mean(np.abs(Ys - Yss)) print mae exit() # sigma = 0.1 # llambda = 1e-9