def generate_fullset_peratom_errors(self, ntkey, tslist): #idx = np.nonzero(self.fdata[ntkey][tskey]['Erdft']) if not tslist: tskeys = self.fdata[ntkey].keys() else: tskeys = tslist Nn = self.fdata[ntkey][list(tskeys)[0]]['Eani'].shape[0] - 1 #print(self.fdata[ntkey]['GDB07to09']['Eani'][Nn,:]) #print(self.fdata[ntkey]['GDB07to09']['Na']) #print(self.fdata[ntkey]['GDB07to09']['Eani'][Nn,:]/self.fdata[ntkey]['GDB07to09']['Na']) return { names[0]: 1000 * hdt.calculatemeanabserror( np.concatenate([ self.fdata[ntkey][tskey]['Eani'][Nn, :] / self.fdata[ntkey][tskey]['Na'] for tskey in tskeys ]), np.concatenate([ self.fdata[ntkey][tskey]['Edft'] / self.fdata[ntkey][tskey]['Na'] for tskey in tskeys ])), names[2]: 1000 * hdt.calculaterootmeansqrerror( np.concatenate([ self.fdata[ntkey][tskey]['Eani'][Nn, :] / self.fdata[ntkey][tskey]['Na'] for tskey in tskeys ]), np.concatenate([ self.fdata[ntkey][tskey]['Edft'] / self.fdata[ntkey][tskey]['Na'] for tskey in tskeys ])), names[4]: 1000 * hdt.calculatemeanabserror( np.concatenate([ self.fdata[ntkey][tskey]['dEani'][Nn, :] / self.fdata[ntkey][tskey]['Na2'] for tskey in tskeys ]), np.concatenate([ self.fdata[ntkey][tskey]['dEdft'] / self.fdata[ntkey][tskey]['Na2'] for tskey in tskeys ])), names[6]: 1000 * hdt.calculaterootmeansqrerror( np.concatenate([ self.fdata[ntkey][tskey]['dEani'][Nn, :] / self.fdata[ntkey][tskey]['Na2'] for tskey in tskeys ]), np.concatenate([ self.fdata[ntkey][tskey]['dEdft'] / self.fdata[ntkey][tskey]['Na2'] for tskey in tskeys ])), }
def Ecorrplot(ax1, Eact, Ecmp, mlbl, color, lab=False): mx = Eact.max() mn = Eact.min() if lab: ax1.plot((mn, mx), (mn, mx), color='black', label='DFT', linewidth=5) else: ax1.plot((mn, mx), (mn, mx), color='black', linewidth=5) rmse = gt.calculaterootmeansqrerror(Eact, Ecmp) ax1.scatter(Eact, Ecmp, marker=r'o', color=color, label=mlbl + ' RMSE: ' + "{:.3f}".format(rmse) + ' kcal/mol', linewidth=1) ax1.set_xlim([mn, mx]) ax1.set_ylim([mn, mx]) #ax1.set_title("title) ax1.set_ylabel('$\Delta E_{cmp}$ (kcal/mol)') ax1.set_xlabel('$\Delta E_{ref}$ (kcal/mol)') ax1.legend(bbox_to_anchor=(0.01, 0.99), loc=2, borderaxespad=0., fontsize=16)
def plot_irc_data(axes, file, rcf, title, ntwl, cnstfile, saefile, dir, idx): xyz, typ, Eact = hdt.readncdat(file, np.float32) Rc = np.load(rcf) # Shift reference to reactant #Eact = Eact[::-1] Eact = hdt.hatokcal * (Eact - Eact[0]) # Plot reference results axes.plot(Rc['x'][:, 1], Eact, color='black', linewidth=3) # Plot ANI results color = cm.rainbow(np.linspace(0, 1, len(ntwl))) terr = np.zeros(len(ntwl)) derr = np.zeros(len(ntwl)) berr = np.zeros(len(ntwl)) for i, (nt, c) in enumerate(zip(ntwl, color)): ncr = pync.conformers(dir + cnstfile, dir + saefile, rcdir + nt[0] + 'networks/', 0) # Set the conformers in NeuroChem ncr.setConformers(confs=xyz, types=list(typ)) # Compute Energies of Conformations E1 = ncr.energy() # Shift ANI E to reactant E1 = hdt.hatokcal * (E1 - E1[0]) # Calculate error errn = hdt.calculaterootmeansqrerror(E1, Eact) terr[i] = errn derr[i] = np.abs( np.abs((E1[0] - E1[-1])) - np.abs((Eact[0] - Eact[-1]))) berr[i] = np.abs(E1.max() - Eact.max()) # Plot axes.plot(Rc['x'][:, 1], E1, 'r--', color=c, label="[" + nt[1] + "]: " + "{:.2f}".format(errn), linewidth=2) #axes.plot([Rc['x'][:,1].min(),Rc['x'][:,1].max()],[E1[-1],E1[-1]], 'r--', color=c) #axes.plot([Rc['x'][:,1].min(),Rc['x'][:,1].max()],[E1[0],E1[0]], 'r--', color=c) axes.set_xlim([Rc['x'][:, 1].min(), Rc['x'][:, 1].max()]) axes.legend(loc="upper left", fontsize=12) if idx < 6: axes.set_title(title, color='green', fontdict={'weight': 'bold'}) else: axes.set_title(title, color='red', fontdict={'weight': 'bold'}) return terr, derr, berr
Eotr3 = gt.hatokcal * Eotr3 Emax = 300.0 Ecmp1 = setmaxE(Eact, Ecmp1, Emax) Ecmp2 = setmaxE(Eact, Ecmp2, Emax) Ecmp3 = setmaxE(Eact, Ecmp3, Emax) Ecmp4 = setmaxE(Eact, Ecmp4, Emax) Ecmp5 = setmaxE(Eact, Ecmp5, Emax) Eotr1 = setmaxE(Eact, Eotr1, Emax) Eotr2 = setmaxE(Eact, Eotr2, Emax) Eotr3 = setmaxE(Eact, Eotr3, Emax) Eact = setmaxE(Eact, Eact, Emax) print('Act count: ' + str(Eact.shape[0])) rmse1 = gt.calculaterootmeansqrerror(Eact, Eotr1) rmse2 = gt.calculaterootmeansqrerror(Eact, Eotr2) rmse3 = gt.calculaterootmeansqrerror(Eact, Eotr3) rmse4 = gt.calculaterootmeansqrerror(Eact, Ecmp1) rmse5 = gt.calculaterootmeansqrerror(Eact, Ecmp2) rmse6 = gt.calculaterootmeansqrerror(Eact, Ecmp3) rmse7 = gt.calculaterootmeansqrerror(Eact, Ecmp4) rmse8 = gt.calculaterootmeansqrerror(Eact, Ecmp5) #plt.scatter(IDX, Eact, marker='o' , color='black', linewidth=3) print("Spearman corr. DFTB: " + "{:.7f}".format(st.spearmanr(Eotr1, Eact)[0])) print("Spearman corr. PM6: " + "{:.7f}".format(st.spearmanr(Eotr2, Eact)[0])) print("Spearman corr. AM1: " + "{:.7f}".format(st.spearmanr(Eotr3, Eact)[0])) print("Spearman corr. ANI-1: " + "{:.7f}".format(st.spearmanr(Ecmp1, Eact)[0]))
rcdir = '/home/jujuman/Research/ANI-DATASET/RXN1_TNET/training/rxn1to6/ani_benz_rxn_ntwk/' cnstfile = '../../rHCNO-4.6A_16-3.1A_a4-8.params' saefile = '../../sae_6-31gd.dat' ncr = pync.conformers(rcdir + cnstfile, rcdir + saefile, rcdir + 'networks/', 1) # Set the conformers in NeuroChem ncr.setConformers(confs=xyz, types=list(typ)) # Compute Energies of Conformations E1 = ncr.energy() # Shift ANI E to reactant E1 = E1[0:][::-1] Ea = Ea[0:][::-1] #x1 = np.linalg.norm(xyz[:,9,:] - xyz[0,9,:],axis=1) #x2 = np.linalg.norm(xyz2[:,9,:] - xyz[0,9,:],axis=1) #print(x2) print(hdt.calculaterootmeansqrerror(hdt.hatokcal * E1, hdt.hatokcal * Ea)) plt.plot(Rc['x'][:, 1], hdt.hatokcal * (E1 - E1[0]), color='blue', linewidth=3) plt.plot(Rc['x'][:, 1], hdt.hatokcal * (Ea - Ea[0]), 'r--', color='black', linewidth=3) plt.show()
def graphEdiffDelta2D(ax, title, data1, data2, Na, min, max): #data1 = gt.convert * data1 #data2 = gt.convert * data2 x, y, z, d = gt.calculateelementdiff2D(data1) x2, y2, z2, d2 = gt.calculateelementdiff2D(data2) RMSE = gt.calculaterootmeansqrerror(d, d2) / float(Na) print('Number of atoms: ' + str(Na)) print('RMSE: ' + str(RMSE) + ' kcal/mol/atom') print('RMSE: ' + str(float(Na) * RMSE) + ' kcal/mol') z = np.abs(z - z2) C = data1.shape[0] mat = np.ndarray(shape=(C, C), dtype=float) for i in x: for j in y: I = int(i) J = int(j) mat[J, I] = z[J + I * C] mat = np.transpose(mat) mat = flipmat(mat, C) #get discrete colormap cmap = plt.get_cmap('RdBu', np.max(mat) - np.min(mat) + 1) # Show mat im = ax.matshow(mat, vmin=min, vmax=max) th = ax.set_title(title, fontsize=16) th.set_position([0.5, 1.005]) cmap = plt.cm.jet norm = plt.Normalize(min, max) rgba = cmap(norm(mat)) for i in range(C): rgba[range(i + 1, C), C - i - 1, :3] = 1, 1, 1 ax.imshow(rgba, interpolation='nearest') # Plot center line ax.plot([x.max() + 1 - 0.5, x.min() - 1 + 0.5], [y.min() - 0.5, x.max() + 0.5], '--', color='red', linewidth=4, alpha=0.8) # Set Limits ax.set_xlim([-0.06 * x.max(), x.max() + 0.06 * x.max()]) ax.set_ylim([-0.06 * y.max(), y.max() + 0.06 * y.max()]) ax.spines["top"].set_visible(False) ax.spines["bottom"].set_visible(False) ax.spines["left"].set_visible(False) ax.spines["right"].set_visible(False) #ax.xaxis.tick_bottom() #ax.yaxis.tick_right() ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) return im
def plot_corr_dist(Xa, Xp, inset=True, figsize=[13, 10]): Fmx = Xa.max() Fmn = Xa.min() label_size = 14 mpl.rcParams['xtick.labelsize'] = label_size mpl.rcParams['ytick.labelsize'] = label_size fig, ax = plt.subplots(figsize=figsize) # Plot ground truth line ax.plot([Fmn, Fmx], [Fmn, Fmx], '--', c='r', linewidth=3) # Set labels #ax.set_xlabel('$F_{dft}$' + r' $(kcal \times mol^{-1} \times \AA^{-1})$', fontsize=22) #ax.set_ylabel('$F_{ani}$' + r' $(kcal \times mol^{-1} \times \AA^{-1})$', fontsize=22) ax.set_xlabel('$Q_{dft}$' + r' $(e \times {10}^{-3})$', fontsize=22) ax.set_ylabel('$Q_{ani}$' + r' $(e \times {10}^{-3})$', fontsize=22) cmap = mpl.cm.viridis # Plot 2d Histogram bins = ax.hist2d(Xa, Xp, bins=200, norm=LogNorm(), range=[[Fmn, Fmx], [Fmn, Fmx]], cmap=cmap) # Build color bar #cbaxes = fig.add_axes([0.91, 0.1, 0.03, 0.8]) cb1 = fig.colorbar(bins[-1], cmap=cmap) cb1.set_label('Count', fontsize=16) # Annotate with errors PMAE = hdn.calculatemeanabserror(Xa, Xp) PRMS = hdn.calculaterootmeansqrerror(Xa, Xp) ax.text(0.75 * ((Fmx - Fmn)) + Fmn, 0.43 * ((Fmx - Fmn)) + Fmn, 'MAE=' + "{:.1f}".format(PMAE) + '\nRMSE=' + "{:.1f}".format(PRMS), fontsize=20, bbox={ 'facecolor': 'white', 'alpha': 0.5, 'pad': 5 }) if inset: axins = zoomed_inset_axes(ax, 2.2, loc=2) # zoom = 6 sz = 6 axins.hist2d(Xa, Xp, bins=50, range=[[Fmn / sz, Fmx / sz], [Fmn / sz, Fmx / sz]], norm=LogNorm(), cmap=cmap) axins.plot([Xa.min(), Xa.max()], [Xa.min(), Xa.max()], '--', c='r', linewidth=3) # sub region of the original image x1, x2, y1, y2 = Fmn / sz, Fmx / sz, Fmn / sz, Fmx / sz axins.set_xlim(x1, x2) axins.set_ylim(y1, y2) axins.yaxis.tick_right() plt.xticks(visible=True) plt.yticks(visible=True) mark_inset(ax, axins, loc1=1, loc2=3, fc="none", ec="0.5") Ferr = Xa - Xp std = np.std(Ferr) men = np.mean(Ferr) axh = plt.axes([.49, .14, .235, .235]) axh.hist(Ferr, bins=75, range=[men - 4 * std, men + 4 * std], normed=True) axh.set_title('Difference distribution') #plt.draw() plt.show()
def produce_scan(ax,title,xlabel,cnstfile,saefile,nnfdir,dtdir,dt1,dt2,dt3,smin,smax,iscale,ishift): xyz, typ, Eact = gt.readncdat(dtdir + dt1,np.float32) xyz2, typ2, Eact2 = gt.readncdat(dtdir + dt2) xyz3, typ3, Eact3 = gt.readncdat(dtdir + dt3) #gt.writexyzfile("/home/jujuman/Dropbox/ChemSciencePaper.AER/TestCases/Dihedrals/4-Cyclohexyl-1-butanol/optimization/dihedral_"+dt1+".xyz",xyz,typ) #Eact = np.array(Eact) #Eact2 = np.array(Eact2) #Eact3 = np.array(Eact3) # Construct pyNeuroChem classes nc1 = pync.conformers(cnstfile, saefile, nnfdir, 0) # Set the conformers in NeuroChem nc1.setConformers(confs=xyz, types=list(typ)) # Print some data from the NeuroChem print('1) Number of Atoms Loaded: ' + str(nc1.getNumAtoms())) print('1) Number of Confs Loaded: ' + str(nc1.getNumConfs())) # Compute Forces of Conformations print('Computing energies 1...') _t1b = tm.time() Ecmp1 = nc1.energy() print(Ecmp1) print('Computation complete 1. Time: ' + "{:.4f}".format((tm.time() - _t1b) * 1000.0) + 'ms') n = smin m = smax Ecmp1 = gt.hatokcal * Ecmp1 Eact = gt.hatokcal * Eact Eact2 = gt.hatokcal * Eact2 Eact3 = gt.hatokcal * Eact3 IDX = np.arange(0, Eact.shape[0], 1, dtype=float) * iscale + ishift IDX = IDX[n:m] Eact = Eact[n:m] Eact2 = Eact2[n:m] Eact3 = Eact3[n:m] Ecmp1 = Ecmp1[n:m] Ecmp1 = Ecmp1 - Ecmp1.min() Eact = Eact - Eact.min() Eact2 = Eact2 - Eact2.min() Eact3 = Eact3 - Eact3.min() rmse1 = gt.calculaterootmeansqrerror(Eact, Ecmp1) rmse3 = gt.calculaterootmeansqrerror(Eact, Eact2) rmse4 = gt.calculaterootmeansqrerror(Eact, Eact3) print("Spearman corr. 1: " + "{:.3f}".format(st.spearmanr(Ecmp1, Eact)[0])) print("Spearman corr. 2: " + "{:.3f}".format(st.spearmanr(Eact2, Eact)[0])) print("Spearman corr. 3: " + "{:.3f}".format(st.spearmanr(Eact3, Eact)[0])) ax.plot(IDX, Eact, '-', marker=r'o', color='black', label='DFT', linewidth=2, markersize=7) ax.plot(IDX, Ecmp1, ':', marker=r'D', color='red', label='ANI-1 RMSE: ' + gt.to_precision(rmse1,2) + ' kcal/mol', linewidth=2, markersize=5) ax.plot(IDX, Eact2, ':', marker=r'v', color='blue', label='DFTB RMSE: ' + gt.to_precision(rmse3,2) + ' kcal/mol', linewidth=2, markersize=5) ax.plot(IDX, Eact3, ':', marker=r'*', color='orange', label='PM6 RMSE: ' + gt.to_precision(rmse4,2) + ' kcal/mol', linewidth=2, markersize=7) #ax.plot(IDX, Eact, color='black', label='DFT', linewidth=3) #ax.scatter(IDX, Eact, marker='o', color='black', linewidth=4) th = ax.set_title(title,fontsize=16) th.set_position([0.5,1.005]) # Set Limits ax.set_xlim([ IDX.min(),IDX.max()]) ax.set_ylim([Eact.min()-1.0,Eact.max()+1.0]) ax.set_ylabel('$\Delta$E calculated (kcal/mol)') ax.set_xlabel(xlabel) ax.legend(bbox_to_anchor=(0.2, 0.98), loc=2, borderaxespad=0., fontsize=14)
Ec2 = [] Ea = [] rmp1 = [] bar1 = [] rmp2 = [] bar2 = [] for i, f in enumerate(files): plot_irc(axarr[int(np.floor(i / X)), i % X], i, d, f) plt.show() bar1 = np.array(bar1) # Barrier 1 bar2 = np.array(bar2) # Barrier 2 rmp1 = np.array(rmp1) # Reactant product 1 rmp2 = np.array(rmp2) # Reactant product 2 Ec1 = np.concatenate(Ec1) Ec2 = np.concatenate(Ec2) Ea = np.concatenate(Ea) #plt.suptitle(str(len(files)) + " Diels-Alder reactions (x axis=$R_c$;y-axis=relative E [kcal/mol])\n"+cts+"\n"+cds+"\n"+cbs,fontsize=14,fontweight='bold',y=0.99) print('Barrier - ANI retrain:', bar1.sum() / bar1.size, 'Original ANI:', bar2.sum() / bar2.size) print('Reac/prod - ANI retrain:', rmp1.sum() / rmp1.size, 'Original ANI:', rmp2.sum() / rmp2.size) print('IRC RMSE - ANI Retrain:', hdt.calculaterootmeansqrerror(Ec1, Ea), 'Original ANI:', hdt.calculaterootmeansqrerror(Ec2, Ea))
for j, k in zip(Eact, Ecmp1): print(' ', j, ':', k) print("Time 1: " + "{:.4f}".format(t1 / 1000.0) + 's') Ecmp1 = gt.hatokcal * Ecmp1 Eact = gt.hatokcal * Eact Emax = 100.0 Ecmp1 = setmaxE(Eact, Ecmp1, Emax) Eact = setmaxE(Eact, Eact, Emax) print('NMOL: ', Ecmp1.shape[0]) rmse2 = gt.calculaterootmeansqrerror(Eact, Ecmp1) mx = Eact.max() mn = Eact.min() #<<<<<<< HEAD #plt.scatter(IDX, Eact, marker='o' , color='black', linewidth=3) #======= #print ( "Spearman corr. DFTB: " + "{:.3f}".format(st.spearmanr(Eotr,Eact)[0]) ) #>>>>>>> 1ec96245cdd1c6d1647ffeed1a6bec3a4b7e4bb4 print("Spearman corr. TGM 08: " + "{:.3f}".format(st.spearmanr(Ecmp1, Eact)[0])) slope2, intercept2, r_value2, p_value2, std_err2 = st.linregress(Eact, Ecmp1)
pms = nnr.get_params() print(pms) params = [] for p in nnr.coefs_: params.append(p.flatten()) Np = np.concatenate(params).size print(Np) print('Predicting...') P = nnr.predict(X_train) P = scaler.inverse_transform(P) A = scaler.inverse_transform(y_train.flatten()) print(hdt.calculaterootmeansqrerror(P, A)) print(hdt.calculatemeanabserror(P, A)) #plt.plot(A,A, color='black') #plt.scatter(P,A,color='blue') #plt.show() print('Predicting...') P = nnr.predict(X_test) P = scaler.inverse_transform(P) A = scaler.inverse_transform(y_test.flatten()) print('RMSE:', hdt.calculaterootmeansqrerror(P, A)) print('MAE: ', hdt.calculatemeanabserror(P, A)) print('r^2:', metrics.r2_score(A, P, sample_weight=None, multioutput=None))
sae = hdt.compute_sae(saefile, scan[1]) serg = scan[2] - sae # Set the conformers in NeuroChem nc.setConformers(confs=scan[0], types=list(scan[1])) nc2.setConformers(confs=scan[0], types=list(scan[1])) x = 0.05 * np.array(range(serg.shape[0]), dtype=np.float64) + 0.6 print(len(x)) popt = np.load('mp_ani_params_test.npz')['param'] fsEc = hdt.buckingham_pot(sdat, *popt) #fsEc = hdt.src_pot(sdat) aerg = nc.energy() + fsEc - sae a2erg = nc2.energy() - sae frmse = hdt.calculaterootmeansqrerror(serg, fsEc) plt.plot(x, serg, color='black', label='QM') plt.plot(x, fsEc, color='red', label='SRC') plt.plot(x, aerg, color='green', label='Corrected') plt.plot(x, a2erg, color='blue', label='Original') plt.xlim(0, 5) plt.xlabel('x') plt.ylabel('y') plt.legend() plt.show()
ani2 = hdn.hatokcal * nc.energy() anipath = '/home/jujuman/Research/GDB-11-wB97X-6-31gd/ANI-c08e-ntwk_newtrain/' cnstfile = anipath + '/rHCNO-4.6A_16-3.1A_a4-8.params' saefile = anipath + '/sae_6-31gd.dat' nnfdir = anipath + '/networks/' # Construct pyNeuroChem class nc = pync.conformers(cnstfile, saefile, nnfdir, 0) # Set the conformers in NeuroChem nc.setConformers(confs=xyz, types=dataf[2]) ani3 = hdn.hatokcal * nc.energy() rmse1 = hdn.calculaterootmeansqrerror(ani1, eng) rmse2 = hdn.calculaterootmeansqrerror(ani2, eng) rmse3 = hdn.calculaterootmeansqrerror(ani3, eng) xv = xyz[:, 3, :] - xyz[0, 3, :] x = np.linalg.norm(xv, axis=1) eng = eng - eng.min() ani1 = ani1 - ani1.min() ani2 = ani2 - ani2.min() ani3 = ani3 - ani3.min() f, axarr = plt.subplots(2, 2) axarr[0, 0].plot(x, eng, 'r-', color='black', label='DFT', linewidth=3) axarr[0, 1].plot(x, eng, 'r-', color='black', label='DFT', linewidth=3)
'/home/jujuman/Dropbox/ChemSciencePaper.AER/JustinsDocuments/ACS_april_2017/DipeptidePhiPsi/data.xyz', xyz, list(data[1])) data2 = hdt.readncdat( '/home/jujuman/Dropbox/ChemSciencePaper.AER/JustinsDocuments/ACS_april_2017/DipeptidePhiPsi/data.dat', type=np.float32) Eact = np.array(Eact) Edft = data2[2] z = np.array(hdt.hatokcal * (Eact - Eact.min()), dtype=np.float32).reshape(x.shape[0], x.shape[0]) z2 = np.array(hdt.hatokcal * (Edft - Edft.min()), dtype=np.float32).reshape(x.shape[0], x.shape[0]) rmse = hdt.calculaterootmeansqrerror(z, z2) print('RMSE: ', rmse) Spline1 = scipy.interpolate.RectBivariateSpline(x, x, z) Spline2 = scipy.interpolate.RectBivariateSpline(x, x, z2) Spline3 = scipy.interpolate.RectBivariateSpline(x, x, abs(z - z2)) XY_New = np.linspace(-180, 180, 200) f, ax = plt.subplots(nrows=1, ncols=3, sharex=True, sharey=True) f.set_size_inches(12, 6) maxi = np.concatenate([z, z2]).max() mini = np.concatenate([z, z2]).min() font = {'family': 'Bitstream Vera Sans', 'weight': 'normal', 'size': 18}
def plot_irc_data(axes, file, title, ntwl, cnstfile, saefile, dir, trained): Eact, xyz, typ, Rc = pyg.read_irc(file) Rc = Rc[:, 1] Rc = Rc[::-1] print(Eact.shape, Rc.shape, xyz.shape) # Shift reference to reactant #Eact = Eact[::-1] Eact = hdt.hatokcal * (Eact - Eact[-1]) # Plot reference results axes.scatter(Rc, Eact, color='black', linewidth=3) # Plot ANI results color = cm.rainbow(np.linspace(0, 1, len(ntwl))) terr = np.zeros(len(ntwl)) derr = np.zeros(len(ntwl)) berr = np.zeros(len(ntwl)) for i, (nt, c) in enumerate(zip(ntwl, color)): ncr = pync.conformers(dir + nt[0] + cnstfile, dir + nt[0] + saefile, rcdir + nt[0] + 'networks/', 0, True) # Set the conformers in NeuroChem ncr.setConformers(confs=xyz, types=list(typ)) # Compute Energies of Conformations E1 = ncr.energy() # Shift ANI E to reactant E1 = hdt.hatokcal * (E1 - E1[-1]) # Calculate error errn = hdt.calculaterootmeansqrerror(E1, Eact) terr[i] = errn derr[i] = np.abs( np.abs((E1[0] - E1[-1])) - np.abs((Eact[0] - Eact[-1]))) berr[i] = np.abs(E1.max() - Eact.max()) # Plot axes.plot(Rc, E1, 'r--', color=c, label="[" + str(i) + "]: " + "{:.1f}".format(berr[i]), linewidth=2) #axes.set_xlim([Rc.min(), Rc.max()]) #axes.set_ylim([-15, 70]) axes.legend(loc="upper left", fontsize=7) if trained: axes.set_title(title, color='green', fontdict={'weight': 'bold'}, x=0.83, y=0.70) else: axes.set_title(title, color='red', fontdict={'weight': 'bold'}, x=0.83, y=0.70) return terr, derr, berr
def generate_rmserror(self, ntkey, tskey, prop1, prop2): Nn = self.fdata[ntkey][tskey][prop1].shape[0] - 1 return hdt.calculaterootmeansqrerror( self.fdata[ntkey][tskey][prop1][Nn, :], self.fdata[ntkey][tskey][prop2])
def plot_bar_propsbynet(self, props, dsets, ntwks=[], fontsize=14, bbox_to_anchor=(1.0, 1.1), figsize=(15.0, 12.0), ncol=1, errortype='MAE'): N = len(dsets) ind = np.arange(N) # the x locations for the groups rects = [] nets = [] label_size = fontsize mpl.rcParams['xtick.labelsize'] = label_size mpl.rcParams['ytick.labelsize'] = label_size fig, axes = plt.subplots(len(props), 1, figsize=(30.0, 24.0)) if len(ntwks) == 0: keys = list(self.fdata.keys()) keys.sort() else: keys = ntwks for j, (p, ax) in enumerate(zip(props, axes.flatten())): bars = dict() errs = dict() width = 0.85 / len(keys) # the width of the bars colors = cm.viridis(np.linspace(0, 1, len(keys))) if j == len(keys) - 1: colors = 'r' for i, (k, c) in enumerate(zip(keys, colors)): bars.update({k: []}) errs.update({k: []}) for tk in dsets: if errortype is 'MAE': height = hdt.calculatemeanabserror( self.fdata[k][tk][p[2]][5, :], self.fdata[k][tk][p[3]]) error = np.std( hdt.calculatemeanabserror(self.fdata[k][tk][p[2]], self.fdata[k][tk][p[3]], axis=1)) #if error > height: # error = height bars[k].append(height) errs[k].append(error) elif errortype is 'RMSE': height = hdt.calculaterootmeansqrerror( self.fdata[k][tk][p[2]][5, :], self.fdata[k][tk][p[3]]) error = np.std( hdt.calculaterootmeansqrerror( self.fdata[k][tk][p[2]], self.fdata[k][tk][p[3]], axis=1)) #if error > height: # error = height bars[k].append(height) errs[k].append(error) rects.append( ax.bar(ind + i * width, bars[k], width, color=c, bottom=0.0)) ax.errorbar(ind + i * width + width / 2.0, bars[k], errs[k], fmt='.', capsize=8, elinewidth=3, color='red', ecolor='red', markeredgewidth=2) ax.set_ylim(p[4]) # add some text for labels, title and axes ticks ax.set_ylabel(p[1], fontsize=fontsize) ax.set_title(p[0], fontsize=fontsize) ax.set_xticks(ind + ((len(keys) + 3) * width) / len(props)) ax.set_xticklabels([d for d in dsets]) if j == 0: ax.legend(rects, keys, fontsize=fontsize, bbox_to_anchor=bbox_to_anchor, ncol=ncol) plt.show()
def plot_error_by_net(self, props, dsets, ntwks=[], fontsize=14, bbox_to_anchor=(1.0, 1.1), figsize=(15.0, 12.0), ncol=1, errortype='MAE', storepath=''): N = len(dsets) ind = np.arange(N) # the x locations for the groups rects = [] nets = [] label_size = fontsize mpl.rcParams['xtick.labelsize'] = label_size mpl.rcParams['ytick.labelsize'] = label_size colors = cm.viridis(np.linspace(0, 1, len(props))) fig, axes = plt.subplots(2, 3, figsize=figsize) if len(ntwks) == 0: keys = list(self.fdata.keys()) keys.sort() else: keys = ntwks for j, (ds, ax) in enumerate(zip(dsets, axes.flatten())): higt = dict() errs = dict() for i, (tk, c) in enumerate(zip(props, colors)): higt.update({tk[0]: []}) errs.update({tk[0]: []}) for k in keys: if errortype is 'MAE': Nn = self.fdata[k][ds][tk[2]].shape[0] - 1 height = hdt.calculatemeanabserror( self.fdata[k][ds][tk[2]][Nn, :], self.fdata[k][ds][tk[3]]) error = np.std( hdt.calculatemeanabserror(self.fdata[k][ds][tk[2]], self.fdata[k][ds][tk[3]], axis=1)) higt[tk[0]].append(height) errs[tk[0]].append(error) elif errortype is 'RMSE': Nn = self.fdata[k][ds][tk[2]].shape[0] - 1 height = hdt.calculaterootmeansqrerror( self.fdata[k][ds][tk[2]][Nn, :], self.fdata[k][ds][tk[3]]) error = np.std( hdt.calculaterootmeansqrerror( self.fdata[k][ds][tk[2]], self.fdata[k][ds][tk[3]], axis=1)) higt[tk[0]].append(height) errs[tk[0]].append(error) x_axis = np.arange(len(higt[tk[0]][:-1])) #ax.set_yscale("log", nonposy='clip') rects.append( ax.plot(x_axis, higt[tk[0]][:-1], '-o', color=c, linewidth=5, label=tk[0])) ax.errorbar(x_axis, higt[tk[0]][:-1], yerr=errs[tk[0]][:-1], fmt='.', capsize=8, elinewidth=3, color=c, ecolor=c, markeredgewidth=2) ax.plot([-0.1, len(higt[tk[0]][:-1]) - 1 + 0.1], [higt[tk[0]][-1], higt[tk[0]][-1]], '--', color=c, linewidth=5) ax.legend(fontsize=fontsize, bbox_to_anchor=bbox_to_anchor, ncol=ncol) ax.set_title(ds, fontsize=fontsize + 2) ax.set_xticks(x_axis) ax.set_xticklabels([d for d in keys[:-1]]) ax.set_ylabel(errortype, fontsize=fontsize) ax.set_xlabel('Active Learning Version', fontsize=fontsize) #ax.set_ylim([0.1,100]) # add some text for labels, title and axes ticks #ax.set_title(p[0], fontsize=fontsize) #ax.set_xticks(ind + ((len(keys)+3)*width) / len(props)) #if j == 0: #ax.legend(rects, keys, fontsize=fontsize, bbox_to_anchor=bbox_to_anchor, ncol=ncol) if storepath: pp = PdfPages(storepath) pp.savefig(fig) pp.close() else: plt.show()
def plot_irc(axes, i, d, f): #print(f) Eact, xyz, spc, Rc = pyg.read_irc(d + f) Eact = hdt.hatokcal * Eact xyz = xyz[1:] Eact = Eact[1:] Rc = Rc[:-1] #print(Rc[:,1]) #print(Eact-Eact.min() - Rc[:,1]-Rc[:,1].min()) s_idx = f.split('IRC')[1].split('.')[0] hdt.writexyzfile(c + f.split('.')[0] + '.xyz', xyz, spc) #print(f.split('IRC')[1].split('.')[0],Rc.shape) if Rc.size > 10: #------------ CV NETWORKS 1 ----------- energies1 = [] N = 0 for comp in nc1: comp.setConformers(confs=xyz, types=list(spc)) energies1.append(hdt.hatokcal * comp.energy()) N = N + 1 energies2 = [] N = 0 for comp in nc2: comp.setConformers(confs=xyz, types=list(spc)) energies2.append(hdt.hatokcal * comp.energy()) N = N + 1 modl_std1 = np.std(energies1, axis=0)[::-1] energies1 = np.mean(np.vstack(energies1), axis=0) modl_std2 = np.std(energies2, axis=0)[::-1] energies2 = np.mean(np.vstack(energies2), axis=0) rmse1 = hdt.calculaterootmeansqrerror(energies1, Eact) rmse2 = hdt.calculaterootmeansqrerror(energies2, Eact) dba = Eact.max() - Eact[0] db1 = energies1.max() - energies1[0] db2 = energies2.max() - energies2[0] rpa = Eact[0] - Eact[-1] rp1 = energies1[0] - energies1[-1] rp2 = energies2[0] - energies2[-1] bar1.append(abs(db1 - dba)) bar2.append(abs(db2 - dba)) rmp1.append(abs(rpa - rp1)) rmp2.append(abs(rpa - rp2)) Ec1.append(energies1) Ec2.append(energies2) Ea.append(Eact) print(i, ')', f, ':', len(spc), ':', rmse1, rmse2, 'R/P1: ', abs(rpa - rp1), 'R/P2: ', abs(rpa - rp2), 'Barrier1:', abs(db1 - dba), 'Barrier2:', abs(db2 - dba)) Rce = hdt.hatokcal * (Rc[:, 0] - Rc[:, 0][0]) Rce1 = energies2[::-1] - energies2[::-1][0] axes.set_xlim([Rc.min(), Rc.max()]) axes.set_ylim([Rce.min() - 1.0, Rce1.max() + 20.0]) axes.plot(Rc[:, 1], hdt.hatokcal * (Rc[:, 0] - Rc[:, 0][0]), color='Black', label='DFT') axes.errorbar(Rc[:, 1], energies2[::-1] - energies2[::-1][0], yerr=modl_std2, fmt='--', color='red', label="ANI-1: " + "{:.1f}".format(bar2[-1]), linewidth=2) axes.errorbar(Rc[:, 1], energies1[::-1] - energies1[::-1][0], yerr=modl_std1, fmt='--', color='blue', label="[" + str(i) + "]: " + "{:.1f}".format(bar1[-1]), linewidth=2) #axes.set_xlabel("Reaction Coordinate $\AA$") #axes.set_ylabel(r"$\Delta E$ $ (kcal \times mol^{-1})$") #axes.plot(Rc[:, 1], energies2[::-1]-energies2[::-1][0],'--',color='red',label="["+str(i)+"]: "+"{:.1f}".format(bar2[-1]),linewidth=3) #axes.plot(Rc[:, 1], energies1[::-1]-energies1[::-1][0],'--',color='green',label="["+str(i)+"]: "+"{:.1f}".format(bar1[-1]),linewidth=3) axes.legend(loc="upper left", fontsize=10) axes.set_title(str(f), color='black', fontdict={'weight': 'bold'}, x=0.8, y=0.85)
def plot_corr_dist_axes(ax, Xp, Xa, cmap, labelx, labely, plabel, vmin=0, vmax=0): Fmx = Xa.max() Fmn = Xa.min() # Plot ground truth line ax.plot([Fmn, Fmx], [Fmn, Fmx], '--', c='red', linewidth=3) # Set labels ax.set_xlabel(labelx, fontsize=26) ax.set_ylabel(labely, fontsize=26) # Plot 2d Histogram if vmin == 0 and vmax == 0: bins = ax.hist2d(Xp, Xa, bins=200, norm=LogNorm(), range=[[Fmn, Fmx], [Fmn, Fmx]], cmap=cmap) else: bins = ax.hist2d(Xp, Xa, bins=200, norm=LogNorm(), range=[[Fmn, Fmx], [Fmn, Fmx]], cmap=cmap, vmin=vmin, vmax=vmax) # Build color bar #cbaxes = fig.add_axes([0.91, 0.1, 0.03, 0.8]) # Annotate with label ax.text(0.25 * ((Fmx - Fmn)) + Fmn, 0.06 * ((Fmx - Fmn)) + Fmn, plabel, fontsize=26) # Annotate with errors PMAE = hdt.calculatemeanabserror(Xa, Xp) PRMS = hdt.calculaterootmeansqrerror(Xa, Xp) ax.text(0.6 * ((Fmx - Fmn)) + Fmn, 0.2 * ((Fmx - Fmn)) + Fmn, 'MAE=' + "{:.3f}".format(PMAE) + '\nRMSE=' + "{:.3f}".format(PRMS), fontsize=30, bbox={ 'facecolor': 'white', 'alpha': 0.5, 'pad': 5 }) axins = zoomed_inset_axes(ax, 2., loc=2) # zoom = 6 sz = 0.1 * (Fmx - Fmn) axins.hist2d(Xp, Xa, bins=50, range=[[Xa.mean() - sz, Xa.mean() + sz], [Xp.mean() - sz, Xp.mean() + sz]], norm=LogNorm(), cmap=cmap) axins.plot([Xp.mean() - sz, Xp.mean() + sz], [Xp.mean() - sz, Xp.mean() + sz], '--', c='r', linewidth=3) # sub region of the original image x1, x2, y1, y2 = Xa.mean() - sz, Xa.mean() + sz, Xp.mean() - sz, Xp.mean( ) + sz axins.set_xlim(x1, x2) axins.set_ylim(y1, y2) axins.yaxis.tick_right() plt.xticks(visible=True) plt.yticks(visible=True) mark_inset(ax, axins, loc1=1, loc2=3, fc="none", ec="1.5") return bins
Fdft = hdn.hatokcal * Fdft #.reshape(-1) idx = np.asarray(np.where(sigma < 0.08))[0] #print(idx,Fani[0].shape,Fdft.shape) Ferr.append((Fani[0][idx] - Fdft[idx]).flatten()) # Calculate full dE dEani = hdn.calculateKdmat(Ncv, Eani) dEdft = hdn.calculatedmat(Edft) # Calculate per molecule errors FMAE = hdn.calculatemeanabserror(Fani.reshape(Ncv, -1), Fdft.reshape(-1), axis=1) FRMSE = hdn.calculaterootmeansqrerror(Fani.reshape(Ncv, -1), Fdft.reshape(-1), axis=1) #plt.hist((Fani-Fdft).flatten(),bins=100) # plt.show() ''' if Emax[0] < np.abs((Eani-Edft)).max(): ind = np.argmax(np.abs((Eani-Edft)).flatten()) Emax[0] = (Eani-Edft).flatten()[ind] Emax[1] = Eani.flatten()[ind] Emax[2] = Edft.flatten()[ind] if Fmax[0] < np.abs((Fani-Fdft)).max(): ind = np.argmax(np.abs((Fani-Fdft)).flatten()) Fmax[0] = (Fani-Fdft).flatten()[ind] Fmax[1] = Fani.flatten()[ind]
def generate_fullset_errors(self, ntkey, tslist): #idx = np.nonzero(self.fdata[ntkey][tskey]['Erdft']) #tskeys = self.fdata[ntkey].keys() if not tslist: tskeys = self.fdata[ntkey].keys() else: tskeys = tslist Nn = self.fdata[ntkey][list(tskeys)[0]]['Eani'].shape[0] - 1 #print(self.fdata[ntkey][tskey]['Fdft'].shape) return { names[0]: hdt.calculatemeanabserror( np.concatenate([ self.fdata[ntkey][tskey]['Eani'][Nn, :] for tskey in tskeys ]), np.concatenate( [self.fdata[ntkey][tskey]['Edft'] for tskey in tskeys])), names[1]: np.std( hdt.calculatemeanabserror(np.hstack([ self.fdata[ntkey][tskey]['Eani'][0:Nn, :] for tskey in tskeys ]), np.hstack([ self.fdata[ntkey][tskey]['Edft'] for tskey in tskeys ]), axis=1)), names[2]: hdt.calculaterootmeansqrerror( np.concatenate([ self.fdata[ntkey][tskey]['Eani'][Nn, :] for tskey in tskeys ]), np.concatenate( [self.fdata[ntkey][tskey]['Edft'] for tskey in tskeys])), names[3]: np.std( hdt.calculaterootmeansqrerror( np.hstack([ self.fdata[ntkey][tskey]['Eani'][0:Nn, :] for tskey in tskeys ]), np.hstack([ self.fdata[ntkey][tskey]['Edft'] for tskey in tskeys ]), axis=1)), names[4]: hdt.calculatemeanabserror( np.concatenate([ self.fdata[ntkey][tskey]['dEani'][Nn, :] for tskey in tskeys ]), np.concatenate( [self.fdata[ntkey][tskey]['dEdft'] for tskey in tskeys])), names[5]: np.std( hdt.calculatemeanabserror(np.hstack([ self.fdata[ntkey][tskey]['dEani'][0:Nn, :] for tskey in tskeys ]), np.hstack([ self.fdata[ntkey][tskey]['dEdft'] for tskey in tskeys ]), axis=1)), names[6]: hdt.calculaterootmeansqrerror( np.concatenate([ self.fdata[ntkey][tskey]['dEani'][Nn, :] for tskey in tskeys ]), np.concatenate( [self.fdata[ntkey][tskey]['dEdft'] for tskey in tskeys])), names[7]: np.std( hdt.calculaterootmeansqrerror( np.hstack( [ self.fdata[ntkey][tskey]['dEani'][0:Nn, :] for tskey in tskeys ]), np.hstack([ self.fdata[ntkey][tskey]['dEdft'] for tskey in tskeys ]), axis=1)), names[8]: hdt.calculatemeanabserror( np.concatenate([ self.fdata[ntkey][tskey]['Fani'][Nn, :] for tskey in tskeys ]), np.concatenate( [self.fdata[ntkey][tskey]['Fdft'] for tskey in tskeys])), names[9]: np.std( hdt.calculatemeanabserror(np.hstack([ self.fdata[ntkey][tskey]['Fani'][0:Nn, :] for tskey in tskeys ]), np.hstack([ self.fdata[ntkey][tskey]['Fdft'] for tskey in tskeys ]), axis=1)), names[10]: hdt.calculaterootmeansqrerror( np.concatenate([ self.fdata[ntkey][tskey]['Fani'][Nn, :] for tskey in tskeys ]), np.concatenate( [self.fdata[ntkey][tskey]['Fdft'] for tskey in tskeys])), names[11]: np.std( hdt.calculaterootmeansqrerror( np.hstack( [ self.fdata[ntkey][tskey]['Fani'][0:Nn, :] for tskey in tskeys ]), np.hstack([ self.fdata[ntkey][tskey]['Fdft'] for tskey in tskeys ]), axis=1)), #'FMAEm': hdt.calculatemeanabserror(self.fdata[ntkey][tskey]['Fani'][Nn,:], self.fdata[ntkey][tskey]['Fdft']), #'FMAEs': np.std(hdt.calculatemeanabserror(self.fdata[ntkey][tskey]['Fani'][0:Nn,:], self.fdata[ntkey][tskey]['Fdft'], axis=1)), #'FRMSm': hdt.calculaterootmeansqrerror(self.fdata[ntkey][tskey]['Fani'][Nn,:], self.fdata[ntkey][tskey]['Fdft']), #'FRMSs': np.std(hdt.calculaterootmeansqrerror(self.fdata[ntkey][tskey]['Fani'][0:Nn, :],self.fdata[ntkey][tskey]['Fdft'], axis=1)), #'dEMAE': hdt.calculatemeanabserror(self.fdata[ntkey][tskey]['dEani'], self.fdata[ntkey][tskey]['dEdft']), #'dERMS': hdt.calculaterootmeansqrerror(self.fdata[ntkey][tskey]['dEani'], self.fdata[ntkey][tskey]['dEdft']), #'ERMAE': hdt.calculatemeanabserror(self.fdata[ntkey][tskey]['Erani'][idx], self.fdata[ntkey][tskey]['Erdft'][idx]), #'ERRMS': hdt.calculaterootmeansqrerror(self.fdata[ntkey][tskey]['Erani'][idx], self.fdata[ntkey][tskey]['rdft'][idx]), }
EA = np.concatenate(EA) dxl = dx dx = np.concatenate(dx) for i, x in enumerate(dxl): if i % 3 == 0: plt.hist(x, bins=50, histtype=u'step') #plt.plot(np.array(range(0,x.shape[0])),x) #plt.ylabel('Rc $(\AA)$') #plt.xlabel('Step') plt.ylabel('count') plt.xlabel('$(\AA)$') plt.show() # Plot errn = hdt.calculaterootmeansqrerror(E1, EA) plt.scatter(dx, E1, color='red', label="{:.2f}".format(errn), linewidth=1) plt.scatter(dx, EA, color='black', linewidth=1) plt.plot(np.array([np.linalg.norm(m[atm] - xr) for m in datairc[0]]), hdt.hatokcal * datairc[2], marker='o', color='blue', linewidth=3) plt.suptitle("Double bond migration IRCs") #plt.ylabel('E (kcal/mol)') #plt.xlabel('Distance $\AA$') plt.legend(bbox_to_anchor=(0.05, 0.95), loc=2, borderaxespad=0., fontsize=16) plt.show()
def generate_fullset_mean_errors(self, ntkey): #idx = np.nonzero(self.fdata[ntkey][tskey]['Erdft']) tskeys = self.fdata[ntkey].keys() Nn = self.fdata[ntkey][list(tskeys)[0]]['Eani'].shape[0] - 1 return { names[2] + 'E': hdt.calculaterootmeansqrerror( np.concatenate([ self.fdata[ntkey][tskey]['Eani'][Nn, :] for tskey in tskeys ]), np.concatenate( [self.fdata[ntkey][tskey]['Edft'] for tskey in tskeys])), names[2] + 'M': np.mean( hdt.calculaterootmeansqrerror( np.hstack([ self.fdata[ntkey][tskey]['Eani'][0:Nn, :] for tskey in tskeys ]), np.hstack([ self.fdata[ntkey][tskey]['Edft'] for tskey in tskeys ]), axis=1)), names[6] + 'E': hdt.calculaterootmeansqrerror( np.concatenate([ self.fdata[ntkey][tskey]['dEani'][Nn, :] for tskey in tskeys ]), np.concatenate( [self.fdata[ntkey][tskey]['dEdft'] for tskey in tskeys])), names[6] + 'M': np.mean( hdt.calculaterootmeansqrerror( np.hstack([ self.fdata[ntkey][tskey]['dEani'][0:Nn, :] for tskey in tskeys ]), np.hstack([ self.fdata[ntkey][tskey]['dEdft'] for tskey in tskeys ]), axis=1)), names[10] + 'E': hdt.calculaterootmeansqrerror( np.concatenate([ self.fdata[ntkey][tskey]['Fani'][Nn, :] for tskey in tskeys ]), np.concatenate( [self.fdata[ntkey][tskey]['Fdft'] for tskey in tskeys])), names[10] + 'M': np.mean( hdt.calculaterootmeansqrerror( np.hstack([ self.fdata[ntkey][tskey]['Fani'][0:Nn, :] for tskey in tskeys ]), np.hstack([ self.fdata[ntkey][tskey]['Fdft'] for tskey in tskeys ]), axis=1)), }
deltas = gt.hatokcal * np.abs(Ecmp_t - np.array(Eact_t, dtype=float)) Me = max(deltas) if Me > Herror: Herror = Me Wfile = '' #data['parent'] + '/' + data['child'] Le = min(deltas) if Le < Lerror: Lerror = Le Bfile = '' #data['parent'] + '/' + data['child'] #print (gt.hatokcal * gt.calculaterootmeansqrerror(np.array(Eact_t, dtype=float),Ecmp_t)) tNa = nc.getNumAtoms() err.append(gt.hatokcal * gt.calculaterootmeansqrerror( np.array(Eact_t, dtype=float), Ecmp_t) / float(tNa)) sze.append(float(len(Eact_t))) time += _t2b Ecmp += Ecmp_t Eact += Eact_t #print('FILE: ', data['child'],' Energy: ', gt.hatokcal * np.array(Eact_t).min(),' Error: ', gt.hatokcal * gt.calculaterootmeansqrerror(np.array(Eact_t),np.array(Ecmp_t))) cnt = cnt + 1 _timeloop2 = (tm.time() - _timeloop) print('Computation complete. Time: ' + "{:.4f}".format(_timeloop2) + 'ms') adl.cleanup() #plt_by_index(np.array(Eerr),-1)
def generate_total_errors(self, ntkey, tskey): #idx = np.nonzero(self.fdata[ntkey][tskey]['Erdft']) Nn = self.fdata[ntkey][tskey]['Eani'].shape[0] - 1 return { names[0]: hdt.calculatemeanabserror(self.fdata[ntkey][tskey]['Eani'][Nn, :], self.fdata[ntkey][tskey]['Edft']), names[1]: np.std( hdt.calculatemeanabserror( self.fdata[ntkey][tskey]['Eani'][0:Nn, :], self.fdata[ntkey][tskey]['Edft'], axis=1)), names[2]: hdt.calculaterootmeansqrerror( self.fdata[ntkey][tskey]['Eani'][Nn, :], self.fdata[ntkey][tskey]['Edft']), names[3]: np.std( hdt.calculaterootmeansqrerror( self.fdata[ntkey][tskey]['Eani'][0:Nn, :], self.fdata[ntkey][tskey]['Edft'], axis=1)), names[4]: hdt.calculatemeanabserror(self.fdata[ntkey][tskey]['dEani'][Nn, :], self.fdata[ntkey][tskey]['dEdft']), names[5]: np.std( hdt.calculatemeanabserror( self.fdata[ntkey][tskey]['dEani'][0:Nn, :], self.fdata[ntkey][tskey]['dEdft'], axis=1)), names[6]: hdt.calculaterootmeansqrerror( self.fdata[ntkey][tskey]['dEani'][Nn, :], self.fdata[ntkey][tskey]['dEdft']), names[7]: np.std( hdt.calculaterootmeansqrerror( self.fdata[ntkey][tskey]['dEani'][0:Nn, :], self.fdata[ntkey][tskey]['dEdft'], axis=1)), names[8]: hdt.calculatemeanabserror(self.fdata[ntkey][tskey]['Fani'][Nn, :], self.fdata[ntkey][tskey]['Fdft']), names[9]: np.std( hdt.calculatemeanabserror( self.fdata[ntkey][tskey]['Fani'][0:Nn, :], self.fdata[ntkey][tskey]['Fdft'], axis=1)), names[10]: hdt.calculaterootmeansqrerror( self.fdata[ntkey][tskey]['Fani'][Nn, :], self.fdata[ntkey][tskey]['Fdft']), names[11]: np.std( hdt.calculaterootmeansqrerror( self.fdata[ntkey][tskey]['Fani'][0:Nn, :], self.fdata[ntkey][tskey]['Fdft'], axis=1)), #'dEMAE': hdt.calculatemeanabserror(self.fdata[ntkey][tskey]['dEani'], self.fdata[ntkey][tskey]['dEdft']), #'dERMS': hdt.calculaterootmeansqrerror(self.fdata[ntkey][tskey]['dEani'], self.fdata[ntkey][tskey]['dEdft']), #'ERMAE': hdt.calculatemeanabserror(self.fdata[ntkey][tskey]['Erani'][idx], self.fdata[ntkey][tskey]['Erdft'][idx]), #'ERRMS': hdt.calculaterootmeansqrerror(self.fdata[ntkey][tskey]['Erani'][idx], self.fdata[ntkey][tskey]['rdft'][idx]), }
hdn.writexyzfile('/home/jujuman/crds.xyz', xyz, data[2][0]) # Set required files for pyNeuroChem #wkdir = '/home/jujuman/Dropbox/ChemSciencePaper.AER/ANI-c08e-ntwk/' wkdir = '/home/jujuman/Research/GDB-11-wB97X-6-31gd/ANI-c08e-ntwk_newtrain/' cnstfile = wkdir + 'rHCNO-4.6A_16-3.1A_a4-8.params' saefile = wkdir + 'sae_6-31gd.dat' nnfdir = wkdir + 'networks/' # Construct pyNeuroChem class mol = pync.conformers(cnstfile, saefile, nnfdir, 0) mol.setConformers(confs=xyz, types=list(data[2][0])) E = hdn.hatokcal * mol.energy() rmse = hdn.calculaterootmeansqrerror(df_E, E) x = list(range(0, df_E.shape[0])) #x = np.linalg.norm(xyz[:,3,:]-xyz[0,3,:],axis=1) #print(x) plt.scatter(x, df_E, label='DFT') plt.scatter(x, E, label='ANI err: ' + str(rmse) + ' kcal/mol') plt.xlabel('Distance ($\AA$)') plt.ylabel('Energy (kcal/mol)') plt.legend(bbox_to_anchor=(0.4, 0.99), loc=2, borderaxespad=0., fontsize=14) plt.show()
def determine_min_error_by_sigma(self, ntkey, minerror, percent, tskeys=['GDB07to09'], figsize=(15.0, 12.0), labelx='', labely='', xyrange=(0.0, 10.0, 0.0, 10.0), storepath='', cmap=mpl.cm.viridis): #tskeys = self.fdata[ntkey].keys() mpl.rcParams['xtick.labelsize'] = 18 mpl.rcParams['ytick.labelsize'] = 18 Nn = self.fdata[ntkey][list(tskeys)[0]]['Eani'].shape[0] - 1 Eani = np.hstack( [self.fdata[ntkey][tskey]['Eani'][0:Nn, :] for tskey in tskeys]) Eanimu = np.hstack( [self.fdata[ntkey][tskey]['Eani'][Nn, :] for tskey in tskeys]) #Eani = np.hstack([self.fdata[ntkey][tskey]['Eani'][Nn, :] for tskey in tskeys]) Edft = np.concatenate( [self.fdata[ntkey][tskey]['Edft'] for tskey in tskeys]) #print(Eani.shape, Edft.shape, ) #print(np.max(Eerr.shape, axis=0)) Sani = np.concatenate([ np.std(self.fdata[ntkey][tskey]['Eani'][0:Nn, :], axis=0) for tskey in tskeys ]) Na = np.concatenate( [self.fdata[ntkey][tskey]['Na'] for tskey in tskeys]) #print(Sani.shape, Na.shape) Sani = Sani / np.sqrt(Na) Eerr = np.max(np.abs(Eani - Edft), axis=0) / np.sqrt(Na) #Eerr = np.abs(np.mean(Eani,axis=0) - Edft) / np.sqrt(Na) #Eerr = np.abs(Eani - Edft) / np.sqrt(Na) #print(Eerr) #print(Sani) Nmax = np.where(Eerr > minerror)[0].size perc = 0 dS = Sani.max() step = 0 while perc < percent: S = dS - step * 0.001 Sidx = np.where(Sani > S) step += 1 perc = 100.0 * np.where(Eerr[Sidx] > minerror)[0].size / (Nmax + 1.0E-7) #print(step,perc,S,Sidx) #print('Step:',step, 'S:',S,' -Perc over:',perc,'Total',100.0*Sidx[0].size/Edft.size) #dE = np.max(Eerr, axis=0) / np.sqrt(Na) #print(Eerr.shape,Eerr) So = np.where(Sani > S) Su = np.where(Sani <= S) print('RMSE Over: ', hdt.calculaterootmeansqrerror(Eanimu[So], Edft[So])) print('RMSE Under: ', hdt.calculaterootmeansqrerror(Eanimu[Su], Edft[Su])) fig, ax = plt.subplots(figsize=figsize) poa = np.where(Eerr[So] > minerror)[0].size / So[0].size pob = np.where(Eerr > minerror)[0].size / Eerr.size ax.text( 0.57 * (xyrange[1]), 0.04 * (xyrange[3]), 'Total Captured: ' + str(int(100.0 * Sidx[0].size / Edft.size)) + '%' + '\n' + r'($\mathrm{\mathcal{E}>}$' + "{:.1f}".format(minerror) + r'$\mathrm{) \forall \rho}$: ' + str(int(100 * pob)) + '%' + '\n' + r'($\mathrm{\mathcal{E}>}$' + "{:.1f}".format(minerror) + r'$\mathrm{) \forall \rho >}$' + "{:.2f}".format(S) + ': ' + str(int(100 * poa)) + '%' + '\n' + r'$\mathrm{E}$ RMSE ($\mathrm{\rho>}$' + "{:.2f}".format(S) + r'$\mathrm{)}$: ' + "{:.1f}".format( hdt.calculaterootmeansqrerror(Eanimu[So], Edft[So])) + '\n' + r'$\mathrm{E}$ RMSE ($\mathrm{\rho\leq}$' + "{:.2f}".format(S) + r'$\mathrm{)}$: ' + "{:.1f}".format( hdt.calculaterootmeansqrerror(Eanimu[Su], Edft[Su])), bbox={ 'facecolor': 'grey', 'alpha': 0.5, 'pad': 10 }, fontsize=18) plt.axvline(x=S, linestyle='--', color='r', linewidth=5, label=r"$\mathrm{\rho=}$" + "{:.2f}".format(S) + ' is the value that captures\n' + str(int(percent)) + '% of errors over ' + r"$\mathrm{\mathcal{E}=}$" + "{:.1f}".format(minerror)) #) # Set labels ax.set_xlabel(labelx, fontsize=24) ax.set_ylabel(labely, fontsize=24) # Plot 2d Histogram bins = ax.hist2d(Sani, Eerr, bins=400, norm=LogNorm(), range=[[xyrange[0], xyrange[1]], [xyrange[2], xyrange[3]]], cmap=cmap) # Build color bar # cbaxes = fig.add_axes([0.91, 0.1, 0.03, 0.8]) cb1 = fig.colorbar(bins[-1], cmap=cmap) cb1.set_label('Count', fontsize=20) cb1.ax.tick_params(labelsize=18) plt.legend(loc='upper center', fontsize=18) if storepath: pp = PdfPages(storepath) pp.savefig(fig) pp.close() else: plt.show()
2.0, 1.9, 2.0, ]) popt, pcov = curve_fit(hdt.buckingham_pot, xt_data, yt_data, p0=p0, bounds=bounds) # NN print(popt) iEc = hdt.buckingham_pot(xv_data, *p0) fEc = hdt.buckingham_pot(xv_data, *popt) irmse = hdt.calculaterootmeansqrerror(iEc, yv_data) frmse = hdt.calculaterootmeansqrerror(fEc, yv_data) np.savez('mp_ani_params_test.npz', param=popt) print('Final RMSE:', hdt.hatokcal * frmse, ' Initial RMSE:', hdt.hatokcal * irmse) plt.plot(yv_data, yv_data, color='black', label='Act') plt.scatter(yv_data, iEc, color='red', label='Init') plt.scatter(yv_data, fEc, color='blue', label='Fit') plt.xlabel('x') plt.ylabel('y') plt.legend() plt.show()
def plot_irc_data(axes, file, rcf, title): xyz, typ, Eact = hdt.readncdat(file, np.float32) Rc = np.load(rcf) # Set required files for pyNeuroChem wkdir = '/home/jujuman/Dropbox/ChemSciencePaper.AER/networks/ANI-c08f-ntwk-cv/' cnstfile = 'rHCNO-4.6A_16-3.1A_a4-8.params' saefile = 'sae_6-31gd.dat' nc = [ pync.conformers(wkdir + cnstfile, wkdir + saefile, wkdir + 'cv_c08e_ntw_' + str(l) + '/networks/', 0) for l in range(5) ] rcdir = '/home/jujuman/Research/ANI-DATASET/RXN1_TNET/training/rxn1to6/ani_benz_rxn_ntwk/' ncr1 = pync.conformers(rcdir + '../../' + cnstfile, rcdir + '../../' + saefile, rcdir + '/networks/', 0) ncr2 = pync.molecule(rcdir + '../../' + cnstfile, rcdir + '../../' + saefile, rcdir + '/networks/', 0) ncr3 = pync.molecule(rcdir + '../../' + cnstfile, rcdir + '../../' + saefile, rcdir + '/networks/', 0) # Compute reactant E ncr2.setMolecule(coords=xyz[0], types=list(typ)) Er = ncr2.energy() # Compute product E ncr3.setMolecule(coords=xyz[-1], types=list(typ)) Ep = ncr3.energy() #Eact = Eact[::-1] dE_ani = hdt.hatokcal * (Er - Ep) dE_dft = hdt.hatokcal * (Eact[0] - Eact[-1]) print('Delta E R/P ANI:', dE_ani, 'Delta E R/P ANI:', dE_dft, 'Diff:', abs(dE_ani - dE_dft)) # Set the conformers in NeuroChem ncr1.setConformers(confs=xyz, types=list(typ)) # Compute Energies of Conformations E1 = ncr1.energy() # Shift E1 = E1 - E1[0] Eact = Eact - Eact[0] # Plot errn = hdt.calculaterootmeansqrerror(hdt.hatokcal * E1, hdt.hatokcal * Eact) axes.plot(Rc['x'][:, 1], hdt.hatokcal * (E1), color='red', label="{:.2f}".format(errn), linewidth=2) axes.plot(Rc['x'][:, 1], hdt.hatokcal * (Eact), 'r--', color='black', linewidth=3) err = [] for n, net in enumerate(nc): # Set the conformers in NeuroChem net.setConformers(confs=xyz, types=list(typ)) # Compute Energies of Conformations E1 = net.energy() E1 = E1 - E1[0] err.append( hdt.calculaterootmeansqrerror(hdt.hatokcal * E1, hdt.hatokcal * Eact)) # Plot if n == len(nc) - 1: mean = np.mean(np.asarray(err)) axes.plot(Rc['x'][:, 1], hdt.hatokcal * (E1), color='blue', label="{:.2f}".format(mean), linewidth=1) else: axes.plot(Rc['x'][:, 1], hdt.hatokcal * (E1), color='blue', linewidth=1) axes.plot(Rc['x'][:, 1], hdt.hatokcal * (E1), color='blue', linewidth=1) axes.set_xlim([Rc['x'][:, 1].min(), Rc['x'][:, 1].max()]) axes.legend(loc="upper right", fontsize=8) axes.set_title(title) return np.array([errn, np.mean(err)])