def train_only(): SIGMA = 10.0 # Read training data from file X, dX, Q, E, F = get_data_from_file(FILENAME_TRAIN, n=40) offset = E.mean() E -= offset print(offset) F = np.concatenate(F) Y = np.concatenate((E, F.flatten())) print("Kernels ...") Kte = get_atomic_local_kernel(X, X, Q, Q, SIGMA) Kt = get_atomic_local_gradient_kernel(X, X, dX, Q, Q, SIGMA) C = np.concatenate((Kte, Kt)) print("Alphas operator ...") alpha = svd_solve(C, Y, rcond=1e-11) np.save("data/training_alphas.npy", alpha) np.save("data/training_Q.npy", Q) np.save("data/training_X.npy", X)
def train(dataname, n_train=100): SIGMA = 10.0 filename_train = "data/" + dataname + "-train.npz" # Read training data from file X, dX, Q, E, F = get_data_from_file(filename_train, n=n_train) offset = E.mean() E -= offset print("OFFSET: ", offset) F = np.concatenate(F) Y = np.concatenate((E, F.flatten())) print("Generating Kernels ...") Kte = get_atomic_local_kernel(X, X, Q, Q, SIGMA) Kt = get_atomic_local_gradient_kernel(X, X, dX, Q, Q, SIGMA) C = np.concatenate((Kte, Kt)) print("Alphas operator ...") alpha = svd_solve(C, Y, rcond=1e-11) np.save("data/"+dataname+"_offset.npy", offset) np.save("data/"+dataname+"_sigma.npy", SIGMA) np.save("data/"+dataname+"_alphas.npy", alpha) np.save("data/"+dataname+"_Q.npy", Q) np.save("data/"+dataname+"_X.npy", X) return
def krr(kernel, properties, rcond=1e-9, solver="cho"): # rcond = 1e-4 if solver == "cho": alpha = cho_solve(kernel, properties, l2reg=rcond) else: alpha = svd_solve(kernel, properties, rcond=rcond) return alpha
def training(kernel_te, kernel_t, Y_te, Y_t, sigma=10.0): """ """ C = np.concatenate((kernel_te, kernel_t)) Y = np.concatenate((Y_te, Y_t.flatten())) alpha = svd_solve(C, Y, rcond=1e-11) return alpha
def train(dataname, n_train=100): SIGMA = 10.0 # Read training data from file # X, dX, Q, E, F = get_data_from_file(filename_train, n=n_train) Xall, dXall, Qall, Eall, Fall = csvdir_to_reps(dataname) if len(Eall) < n_train: print("Not enough training data for", n_train) exit() idx = list(range(len(Eall))) np.random.shuffle(idx) train = idx[:n_train] print(len(train)) X = Xall[train] dX = dXall[train] Q = [Qall[i] for i in train] E = Eall[train] F = [Fall[i] for i in train] offset = 0.0 print("OFFSET: ", offset) F = np.concatenate(F) Y = np.concatenate((E, F.flatten())) print("Generating Kernels ...") Kte = get_atomic_local_kernel(X, X, Q, Q, SIGMA) Kt = get_atomic_local_gradient_kernel(X, X, dX, Q, Q, SIGMA) C = np.concatenate((Kte, Kt)) print("Alphas operator ...") alpha = svd_solve(C, Y, rcond=1e-11) np.save("data/" + dataname + "_offset.npy", offset) np.save("data/" + dataname + "_sigma.npy", SIGMA) np.save("data/" + dataname + "_alphas.npy", alpha) np.save("data/" + dataname + "_Q.npy", Q, allow_pickle=True) np.save("data/" + dataname + "_X.npy", X) return
dK = get_atomic_local_electric_field_gradient_kernels( X, X, df=df, ef_scaling=ef_scaling, **kernel_args)[0] t_end = time() print("Elapsed:", t_start - t_end) t_start = time() dKs = get_atomic_local_electric_field_gradient_kernels( X, Xs, df=df, ef_scaling=ef_scaling, **kernel_args)[0] t_end = time() print("Elapsed:", t_start - t_end) t_start = time() Y = deepcopy(D.flatten()) C = deepcopy(dK.T) dY = D.flatten() dYs = Ds.flatten() alpha = svd_solve(C, Y, rcond=llambda) np.save("alphas.npy", alpha) t_end = time() dYss = np.dot(dKs.T, alpha) dmae = np.mean(np.abs(dYs - dYss)) / DIPOLE_UNIT t_elapsed = t_end - t_start print("%7.2f %20.12f Debye %10.2f s" % (sigmas[0], dmae, t_elapsed))
def train(): # print(" -> Start training") # start = time() # subprocess.Popen(("python3","model_training.py","train")) # end = time() # # total_runtime = end - start # # print(" -> Training time: {:.3f}".format(total_runtime)) #data = get_properties("energies.txt") data = get_properties("train") mols = [] mols_pred = [] SIGMA = 2.5 #float(sys.argv[1]) for name in sorted(data.keys()): mol = qml.Compound() mol.read_xyz("xyz/" + name + ".xyz") # Associate a property (heat of formation) with the object mol.properties = data[name][0] mols.append(mol) shuffle(mols) #mols_train = mols[:400] #mols_test = mols[400:] # REPRESENTATIONS print("\n -> calculate representations") start = time() x = [] disp_x = [] f = [] e = [] q = [] for mol in mols: (x1, dx1) = generate_fchl_acsf(mol.nuclear_charges, mol.coordinates, gradients=True, pad=23, elements=[1, 6, 7, 8, 16, 17]) e.append(mol.properties) f.append(data[(mol.name)[4:-4]][1]) x.append(x1) disp_x.append(dx1) q.append(mol.nuclear_charges) X_train = np.array(x) F_train = np.array(f) F_train *= -1 E_train = np.array(e) dX_train = np.array(disp_x) Q_train = q E_mean = np.mean(E_train) E_train -= E_mean F_train = np.concatenate(F_train) end = time() print(end - start) print("") print(" -> calculating Kernels") start = time() Kte = get_atomic_local_kernel(X_train, X_train, Q_train, Q_train, SIGMA) #Kte_test = get_atomic_local_kernel(X_train, X_test, Q_train, Q_test, SIGMA) Kt = get_atomic_local_gradient_kernel(X_train, X_train, dX_train, Q_train, Q_train, SIGMA) #Kt_test = get_atomic_local_gradient_kernel(X_train, X_test, dX_test, Q_train, Q_test, SIGMA) C = np.concatenate((Kte, Kt)) Y = np.concatenate((E_train, F_train.flatten())) end = time() print(end - start) print("") print("Alphas operator ...") start = time() alpha = svd_solve(C, Y, rcond=1e-12) end = time() print(end - start) print("") print("save X") np.save('X_active_learning.npy', X_train) # with open("X_mp2.cpickle", 'wb') as f: # cPickle.dump(X_train, f, protocol=2) print("save alphas") np.save('alphas_active_learning.npy', alpha) # with open("alphas_mp2.cpickle", 'wb') as f: # cPickle.dump(alpha, f, protocol=2) print("save Q") np.save('Q_active_learning.npy', Q_train) # with open("Q_mp2.cpickle", 'wb') as f: # cPickle.dump(Q_train, f, protocol=2) eYt = np.dot(Kte, alpha) fYt = np.dot(Kt, alpha) #eYt_test = np.dot(Kte_test, alpha) #fYt_test = np.dot(Kt_test, alpha) slope, intercept, r_value, p_value, std_err = scipy.stats.linregress( E_train, eYt) print("TRAINING ENERGY MAE = %10.4f slope = %10.4f intercept = %10.4f r^2 = %9.6f" % \ (np.mean(np.abs(E_train - eYt)), slope, intercept, r_value )) slope, intercept, r_value, p_value, std_err = scipy.stats.linregress( F_train.flatten(), fYt.flatten()) print("TRAINING FORCE MAE = %10.4f slope = %10.4f intercept = %10.4f r^2 = %9.6f" % \ (np.mean(np.abs(F_train.flatten() - fYt.flatten())), slope, intercept, r_value ))
def train_alphas(reps, dreps, nuclear_charges, E, F, train_idx, parameters): print(reps.shape) all_idx = np.array(list(range(4001))) test_idx = np.array([i for i in all_idx if i not in train_idx]) print(train_idx) print(test_idx) natoms = reps.shape[1] nmols = len(E) atoms = np.array([i for i in range(natoms * 3)]) train_idx_force = np.array( [atoms + (3 * natoms) * j + nmols for j in train_idx]).flatten() test_idx_force = np.array( [atoms + (3 * natoms) * j + nmols for j in test_idx]).flatten() idx = np.concatenate((train_idx, train_idx_force)) n_train = len(train_idx) n_test = len(test_idx) X = reps[train_idx] Xs = reps[test_idx] dX = dreps[train_idx] dXs = dreps[test_idx] Q = [nuclear_charges[i] for i in train_idx] Qs = [nuclear_charges[i] for i in test_idx] Ke = get_atomic_local_kernel(X, X, Q, Q, parameters["sigma"]) Kf = get_atomic_local_gradient_kernel(X, X, dX, Q, Q, parameters["sigma"]) C = np.concatenate((Ke, Kf)) Kes = get_atomic_local_kernel(X, Xs, Q, Qs, parameters["sigma"]) Kfs = get_atomic_local_gradient_kernel(X, Xs, dXs, Q, Qs, parameters["sigma"]) Y = np.concatenate((E[train_idx], F[train_idx].flatten())) alphas = svd_solve(C, Y, rcond=parameters["llambda"]) eYs = deepcopy(E[test_idx]) fYs = deepcopy(F[test_idx]).flatten() eYss = np.dot(Kes, alphas) fYss = np.dot(Kfs, alphas) ermse_test = np.sqrt(np.mean(np.square(eYss - eYs))) emae_test = np.mean(np.abs(eYss - eYs)) frmse_test = np.sqrt(np.mean(np.square(fYss - fYs))) fmae_test = np.mean(np.abs(fYss - fYs)) schnet_score = 0.01 * sum(np.square(eYss - eYs)) schnet_score += sum(np.square(fYss - fYs)) / natoms print("TEST %5.2f %.2E %6.4e %10.8f %10.8f %10.8f %10.8f" % \ (parameters["sigma"], parameters["llambda"], schnet_score, emae_test, ermse_test, fmae_test, frmse_test)) return alphas
def test_fchl_acsf_operator_dft(): SIGMA = 10.0 Xall, dXall, Qall, Eall, Fall = csvdir_to_reps("csv_data") idx = list(range(len(Eall))) np.random.shuffle(idx) print(len(idx)) train = idx[:100] test = idx[100:] print("train = ", len(train), " test = ", len(test)) X = Xall[train] dX = dXall[train] Q = [Qall[i] for i in train] E = Eall[train] F = [Fall[i] for i in train] Xs = Xall[test] dXs = dXall[test] Qs = [Qall[i] for i in test] Es = Eall[test] Fs = [Fall[i] for i in test] print("Representations ...") F = np.concatenate(F) Fs = np.concatenate(Fs) print("Kernels ...") Kte = get_atomic_local_kernel(X, X, Q, Q, SIGMA) Kse = get_atomic_local_kernel(X, Xs, Q, Qs, SIGMA) Kt = get_atomic_local_gradient_kernel(X, X, dX, Q, Q, SIGMA) Ks = get_atomic_local_gradient_kernel(X, Xs, dXs, Q, Qs, SIGMA) C = np.concatenate((Kte, Kt)) Y = np.concatenate((E, F.flatten())) print("Alphas operator ...") alpha = svd_solve(C, Y, rcond=1e-11) eYt = np.dot(Kte, alpha) eYs = np.dot(Kse, alpha) fYt = np.dot(Kt, alpha) fYs = np.dot(Ks, alpha) print( "===============================================================================================" ) print( "==== OPERATOR, FORCE + ENERGY ===============================================================" ) print( "===============================================================================================" ) slope, intercept, r_value, p_value, std_err = scipy.stats.linregress( E, eYt) print("TRAINING ENERGY MAE = %10.4f slope = %10.4f intercept = %10.4f r^2 = %9.6f" % \ (np.mean(np.abs(E - eYt)), slope, intercept, r_value )) slope, intercept, r_value, p_value, std_err = scipy.stats.linregress( F.flatten(), fYt.flatten()) print("TRAINING FORCE MAE = %10.4f slope = %10.4f intercept = %10.4f r^2 = %9.6f" % \ (np.mean(np.abs(F.flatten() - fYt.flatten())), slope, intercept, r_value )) slope, intercept, r_value, p_value, std_err = scipy.stats.linregress( Es.flatten(), eYs.flatten()) print("TEST ENERGY MAE = %10.4f slope = %10.4f intercept = %10.4f r^2 = %9.6f" % \ (np.mean(np.abs(Es - eYs)), slope, intercept, r_value )) slope, intercept, r_value, p_value, std_err = scipy.stats.linregress( Fs.flatten(), fYs.flatten()) print("TEST FORCE MAE = %10.4f slope = %10.4f intercept = %10.4f r^2 = %9.6f" % \ (np.mean(np.abs(Fs.flatten() - fYs.flatten())), slope, intercept, r_value ))
def test_fchl_acsf_operator_ccsd(): SIGMA = 10.0 X, dX, Q, E, F = get_data_from_file(FILENAME_TRAIN, n=40) Xs, dXs, Qs, Es, Fs = get_data_from_file(FILENAME_TEST, n=20) offset = E.mean() E -= offset Es -= offset print("Representations ...") F = np.concatenate(F) Fs = np.concatenate(Fs) print("Kernels ...") Kte = get_atomic_local_kernel(X, X, Q, Q, SIGMA) Kse = get_atomic_local_kernel(X, Xs, Q, Qs, SIGMA) Kt = get_atomic_local_gradient_kernel(X, X, dX, Q, Q, SIGMA) Ks = get_atomic_local_gradient_kernel(X, Xs, dXs, Q, Qs, SIGMA) C = np.concatenate((Kte, Kt)) Y = np.concatenate((E, F.flatten())) print("Alphas operator ...") alpha = svd_solve(C, Y, rcond=1e-11) eYt = np.dot(Kte, alpha) eYs = np.dot(Kse, alpha) fYt = np.dot(Kt, alpha) fYs = np.dot(Ks, alpha) print( "===============================================================================================" ) print( "==== OPERATOR, FORCE + ENERGY ===============================================================" ) print( "===============================================================================================" ) slope, intercept, r_value, p_value, std_err = scipy.stats.linregress( E, eYt) print("TRAINING ENERGY MAE = %10.4f slope = %10.4f intercept = %10.4f r^2 = %9.6f" % \ (np.mean(np.abs(E - eYt)), slope, intercept, r_value )) slope, intercept, r_value, p_value, std_err = scipy.stats.linregress( F.flatten(), fYt.flatten()) print("TRAINING FORCE MAE = %10.4f slope = %10.4f intercept = %10.4f r^2 = %9.6f" % \ (np.mean(np.abs(F.flatten() - fYt.flatten())), slope, intercept, r_value )) slope, intercept, r_value, p_value, std_err = scipy.stats.linregress( Es.flatten(), eYs.flatten()) print("TEST ENERGY MAE = %10.4f slope = %10.4f intercept = %10.4f r^2 = %9.6f" % \ (np.mean(np.abs(Es - eYs)), slope, intercept, r_value )) slope, intercept, r_value, p_value, std_err = scipy.stats.linregress( Fs.flatten(), fYs.flatten()) print("TEST FORCE MAE = %10.4f slope = %10.4f intercept = %10.4f r^2 = %9.6f" % \ (np.mean(np.abs(Fs.flatten() - fYs.flatten())), slope, intercept, r_value ))