gp = GaussianProcess(corr='absolute_exponential', theta0=sp.asarray([theta0]), nugget=1e-3, verbose=True, low_memory=False) # Fit to data using Maximum Likelihood Estimation of the parameters gp.fit(eigX, y) print "TIMER teach", time.clock() - ttt ttt = time.clock() # # Make the prediction on training set # y_pred, MSE = gp.predict(eigX, eval_MSE=True) # sigma = sp.sqrt(MSE) # print('\n training set:') # print('MAE: %5.2f kcal/mol' % sp.absolute(y_pred-y).mean(axis=0)) # print('RMSE: %5.2f kcal/mol' % sp.square(y_pred-y).mean(axis=0)**.5) # Make the prediction on test set y_pred, MSE = gp.predict(eigt, eval_MSE=True) sigma = sp.sqrt(MSE) print('\n test set:') print('MAE: %5.2f kcal/mol' % sp.absolute(y_pred-Ttest.ravel()).mean(axis=0)) print('RMSE: %5.2f kcal/mol' %sp.square(y_pred-Ttest.ravel()).mean(axis=0)**.5) print "TIMER predict", time.clock() - ttt alpha.append(gp.alpha) covmat.append(gp.K) print r"\alpha STD: %f" % (sp.std(gp.alpha) / sp.mean(sp.absolute(gp.alpha))) print "\n", "-"*60, "\n" # P = dataset['P'][range(0,split)+range(split+1,5)].flatten() # X = dataset['X'][P] # T = dataset['T'][P]
alphas = teach_database_plusone(gp, eigX_db, y_db, eigX_t, y_t) alpha_target.append(alphas) # -------------------------------------------- # -------------------------------------------- # Second time don't include the test set and predict # -------------------------------------------- ttt = time.clock() gp.flush_data() # Fit to data gp.fit(eigX_db, y_db) print "TIMER teach", time.clock() - ttt beta = sp.dot(gp.inverse, gp.alpha) y_pred, k = gp.predict(eigX_t, return_k=True) # -------------------------------------------- # predict the alphas the K-1 * K-1 * k way # -------------------------------------------- alpha_predicted.append(sp.dot(k, beta.flatten())) energy_target.append(y_t) energy_error.append(y_pred - y_t) # check whether the ML itself is doing sensible things print "ERROR = ", energy_error[-1] print "ALPHA TRUE vs. PREDICTED:", alphas, alpha_predicted[-1] with open('alpha_predictions.txt', 'a') as f: f.write("n_test_molecules=%d n_databases=%d db_size=%d\n" % (Ntest, Ndatabases, Nteach)) output_data = sp.vstack((sp.array(alpha_target).flatten(), sp.array(alpha_predicted).flatten(), sp.array(energy_target).flatten(), sp.array(energy_error).flatten())) sp.savetxt(f, output_data.T)
alpha_target.append(alphas) gp.flush_data() # -------------------------------------------- # -------------------------------------------- # Second time don't include the test set and predict # -------------------------------------------- ttt = time.clock() # Fit to data gp.fit(eigX_db, y_db) gp_level2.fit(eigX_db, gp.alpha.flatten()) print "TIMER teach", time.clock() - ttt y_pred = gp.predict(eigX_t) # -------------------------------------------- # predict the alphas # -------------------------------------------- alpha_pred = gp_level2.predict(eigX_t) alpha_predicted.append(alpha_pred.flatten()) energy_target.append(y_t) energy_error.append(y_pred - y_t) # check whether the level 1 ML itself is predicting the property correctly print "ERROR = ", energy_error[-1] print "ALPHA TRUE vs. PREDICTED:", alphas, alpha_predicted[-1]
verbose=True, low_memory=False) # Fit to data using Maximum Likelihood Estimation of the parameters gp.fit(eigX, y) print "TIMER teach", time.clock() - ttt ttt = time.clock() # # Make the prediction on training set # y_pred, MSE = gp.predict(eigX, eval_MSE=True) # sigma = sp.sqrt(MSE) # print('\n training set:') # print('MAE: %5.2f kcal/mol' % sp.absolute(y_pred-y).mean(axis=0)) # print('RMSE: %5.2f kcal/mol' % sp.square(y_pred-y).mean(axis=0)**.5) # Make the prediction on test set y_pred, MSE = gp.predict(eigt, eval_MSE=True) sigma = sp.sqrt(MSE) print('\n test set:') print('MAE: %5.2f kcal/mol' % sp.absolute(y_pred - Ttest.ravel()).mean(axis=0)) print('RMSE: %5.2f kcal/mol' % sp.square(y_pred - Ttest.ravel()).mean(axis=0)**.5) print "TIMER predict", time.clock() - ttt alpha.append(gp.alpha) covmat.append(gp.K) print r"\alpha STD: %f" % (sp.std(gp.alpha) / sp.mean(sp.absolute(gp.alpha))) print "\n", "-" * 60, "\n"
alpha_target.append(alphas) gp.flush_data() # -------------------------------------------- # -------------------------------------------- # Second time don't include the test set and predict # -------------------------------------------- ttt = time.clock() # Fit to data gp.fit(eigX_db, y_db) gp_level2.fit(eigX_db, gp.alpha.flatten()) print "TIMER teach", time.clock() - ttt y_pred = gp.predict(eigX_t) # -------------------------------------------- # predict the alphas # -------------------------------------------- alpha_pred = gp_level2.predict(eigX_t) alpha_predicted.append(alpha_pred.flatten()) energy_target.append(y_t) energy_error.append(y_pred - y_t) # check whether the level 1 ML itself is predicting the property correctly print "ERROR = ", energy_error[-1] print "ALPHA TRUE vs. PREDICTED:", alphas, alpha_predicted[-1]