def generate_fullset_peratom_errors(self, ntkey, tslist): #idx = np.nonzero(self.fdata[ntkey][tskey]['Erdft']) if not tslist: tskeys = self.fdata[ntkey].keys() else: tskeys = tslist Nn = self.fdata[ntkey][list(tskeys)[0]]['Eani'].shape[0] - 1 #print(self.fdata[ntkey]['GDB07to09']['Eani'][Nn,:]) #print(self.fdata[ntkey]['GDB07to09']['Na']) #print(self.fdata[ntkey]['GDB07to09']['Eani'][Nn,:]/self.fdata[ntkey]['GDB07to09']['Na']) return { names[0]: 1000 * hdt.calculatemeanabserror( np.concatenate([ self.fdata[ntkey][tskey]['Eani'][Nn, :] / self.fdata[ntkey][tskey]['Na'] for tskey in tskeys ]), np.concatenate([ self.fdata[ntkey][tskey]['Edft'] / self.fdata[ntkey][tskey]['Na'] for tskey in tskeys ])), names[2]: 1000 * hdt.calculaterootmeansqrerror( np.concatenate([ self.fdata[ntkey][tskey]['Eani'][Nn, :] / self.fdata[ntkey][tskey]['Na'] for tskey in tskeys ]), np.concatenate([ self.fdata[ntkey][tskey]['Edft'] / self.fdata[ntkey][tskey]['Na'] for tskey in tskeys ])), names[4]: 1000 * hdt.calculatemeanabserror( np.concatenate([ self.fdata[ntkey][tskey]['dEani'][Nn, :] / self.fdata[ntkey][tskey]['Na2'] for tskey in tskeys ]), np.concatenate([ self.fdata[ntkey][tskey]['dEdft'] / self.fdata[ntkey][tskey]['Na2'] for tskey in tskeys ])), names[6]: 1000 * hdt.calculaterootmeansqrerror( np.concatenate([ self.fdata[ntkey][tskey]['dEani'][Nn, :] / self.fdata[ntkey][tskey]['Na2'] for tskey in tskeys ]), np.concatenate([ self.fdata[ntkey][tskey]['dEdft'] / self.fdata[ntkey][tskey]['Na2'] for tskey in tskeys ])), }
def plot_corr_dist(Xa, Xp, inset=True, figsize=[13, 10]): Fmx = Xa.max() Fmn = Xa.min() label_size = 14 mpl.rcParams['xtick.labelsize'] = label_size mpl.rcParams['ytick.labelsize'] = label_size fig, ax = plt.subplots(figsize=figsize) # Plot ground truth line ax.plot([Fmn, Fmx], [Fmn, Fmx], '--', c='r', linewidth=3) # Set labels #ax.set_xlabel('$F_{dft}$' + r' $(kcal \times mol^{-1} \times \AA^{-1})$', fontsize=22) #ax.set_ylabel('$F_{ani}$' + r' $(kcal \times mol^{-1} \times \AA^{-1})$', fontsize=22) ax.set_xlabel('$Q_{dft}$' + r' $(e \times {10}^{-3})$', fontsize=22) ax.set_ylabel('$Q_{ani}$' + r' $(e \times {10}^{-3})$', fontsize=22) cmap = mpl.cm.viridis # Plot 2d Histogram bins = ax.hist2d(Xa, Xp, bins=200, norm=LogNorm(), range=[[Fmn, Fmx], [Fmn, Fmx]], cmap=cmap) # Build color bar #cbaxes = fig.add_axes([0.91, 0.1, 0.03, 0.8]) cb1 = fig.colorbar(bins[-1], cmap=cmap) cb1.set_label('Count', fontsize=16) # Annotate with errors PMAE = hdn.calculatemeanabserror(Xa, Xp) PRMS = hdn.calculaterootmeansqrerror(Xa, Xp) ax.text(0.75 * ((Fmx - Fmn)) + Fmn, 0.43 * ((Fmx - Fmn)) + Fmn, 'MAE=' + "{:.1f}".format(PMAE) + '\nRMSE=' + "{:.1f}".format(PRMS), fontsize=20, bbox={ 'facecolor': 'white', 'alpha': 0.5, 'pad': 5 }) if inset: axins = zoomed_inset_axes(ax, 2.2, loc=2) # zoom = 6 sz = 6 axins.hist2d(Xa, Xp, bins=50, range=[[Fmn / sz, Fmx / sz], [Fmn / sz, Fmx / sz]], norm=LogNorm(), cmap=cmap) axins.plot([Xa.min(), Xa.max()], [Xa.min(), Xa.max()], '--', c='r', linewidth=3) # sub region of the original image x1, x2, y1, y2 = Fmn / sz, Fmx / sz, Fmn / sz, Fmx / sz axins.set_xlim(x1, x2) axins.set_ylim(y1, y2) axins.yaxis.tick_right() plt.xticks(visible=True) plt.yticks(visible=True) mark_inset(ax, axins, loc1=1, loc2=3, fc="none", ec="0.5") Ferr = Xa - Xp std = np.std(Ferr) men = np.mean(Ferr) axh = plt.axes([.49, .14, .235, .235]) axh.hist(Ferr, bins=75, range=[men - 4 * std, men + 4 * std], normed=True) axh.set_title('Difference distribution') #plt.draw() plt.show()
#print(Edft-Eani) #Fani = hdn.hatokcal * Fani#.reshape(Ncv, -1) Fdft = hdn.hatokcal * Fdft #.reshape(-1) idx = np.asarray(np.where(sigma < 0.08))[0] #print(idx,Fani[0].shape,Fdft.shape) Ferr.append((Fani[0][idx] - Fdft[idx]).flatten()) # Calculate full dE dEani = hdn.calculateKdmat(Ncv, Eani) dEdft = hdn.calculatedmat(Edft) # Calculate per molecule errors FMAE = hdn.calculatemeanabserror(Fani.reshape(Ncv, -1), Fdft.reshape(-1), axis=1) FRMSE = hdn.calculaterootmeansqrerror(Fani.reshape(Ncv, -1), Fdft.reshape(-1), axis=1) #plt.hist((Fani-Fdft).flatten(),bins=100) # plt.show() ''' if Emax[0] < np.abs((Eani-Edft)).max(): ind = np.argmax(np.abs((Eani-Edft)).flatten()) Emax[0] = (Eani-Edft).flatten()[ind] Emax[1] = Eani.flatten()[ind] Emax[2] = Edft.flatten()[ind] if Fmax[0] < np.abs((Fani-Fdft)).max():
def plot_bar_propsbynet(self, props, dsets, ntwks=[], fontsize=14, bbox_to_anchor=(1.0, 1.1), figsize=(15.0, 12.0), ncol=1, errortype='MAE'): N = len(dsets) ind = np.arange(N) # the x locations for the groups rects = [] nets = [] label_size = fontsize mpl.rcParams['xtick.labelsize'] = label_size mpl.rcParams['ytick.labelsize'] = label_size fig, axes = plt.subplots(len(props), 1, figsize=(30.0, 24.0)) if len(ntwks) == 0: keys = list(self.fdata.keys()) keys.sort() else: keys = ntwks for j, (p, ax) in enumerate(zip(props, axes.flatten())): bars = dict() errs = dict() width = 0.85 / len(keys) # the width of the bars colors = cm.viridis(np.linspace(0, 1, len(keys))) if j == len(keys) - 1: colors = 'r' for i, (k, c) in enumerate(zip(keys, colors)): bars.update({k: []}) errs.update({k: []}) for tk in dsets: if errortype is 'MAE': height = hdt.calculatemeanabserror( self.fdata[k][tk][p[2]][5, :], self.fdata[k][tk][p[3]]) error = np.std( hdt.calculatemeanabserror(self.fdata[k][tk][p[2]], self.fdata[k][tk][p[3]], axis=1)) #if error > height: # error = height bars[k].append(height) errs[k].append(error) elif errortype is 'RMSE': height = hdt.calculaterootmeansqrerror( self.fdata[k][tk][p[2]][5, :], self.fdata[k][tk][p[3]]) error = np.std( hdt.calculaterootmeansqrerror( self.fdata[k][tk][p[2]], self.fdata[k][tk][p[3]], axis=1)) #if error > height: # error = height bars[k].append(height) errs[k].append(error) rects.append( ax.bar(ind + i * width, bars[k], width, color=c, bottom=0.0)) ax.errorbar(ind + i * width + width / 2.0, bars[k], errs[k], fmt='.', capsize=8, elinewidth=3, color='red', ecolor='red', markeredgewidth=2) ax.set_ylim(p[4]) # add some text for labels, title and axes ticks ax.set_ylabel(p[1], fontsize=fontsize) ax.set_title(p[0], fontsize=fontsize) ax.set_xticks(ind + ((len(keys) + 3) * width) / len(props)) ax.set_xticklabels([d for d in dsets]) if j == 0: ax.legend(rects, keys, fontsize=fontsize, bbox_to_anchor=bbox_to_anchor, ncol=ncol) plt.show()
def generate_total_errors(self, ntkey, tskey): #idx = np.nonzero(self.fdata[ntkey][tskey]['Erdft']) Nn = self.fdata[ntkey][tskey]['Eani'].shape[0] - 1 return { names[0]: hdt.calculatemeanabserror(self.fdata[ntkey][tskey]['Eani'][Nn, :], self.fdata[ntkey][tskey]['Edft']), names[1]: np.std( hdt.calculatemeanabserror( self.fdata[ntkey][tskey]['Eani'][0:Nn, :], self.fdata[ntkey][tskey]['Edft'], axis=1)), names[2]: hdt.calculaterootmeansqrerror( self.fdata[ntkey][tskey]['Eani'][Nn, :], self.fdata[ntkey][tskey]['Edft']), names[3]: np.std( hdt.calculaterootmeansqrerror( self.fdata[ntkey][tskey]['Eani'][0:Nn, :], self.fdata[ntkey][tskey]['Edft'], axis=1)), names[4]: hdt.calculatemeanabserror(self.fdata[ntkey][tskey]['dEani'][Nn, :], self.fdata[ntkey][tskey]['dEdft']), names[5]: np.std( hdt.calculatemeanabserror( self.fdata[ntkey][tskey]['dEani'][0:Nn, :], self.fdata[ntkey][tskey]['dEdft'], axis=1)), names[6]: hdt.calculaterootmeansqrerror( self.fdata[ntkey][tskey]['dEani'][Nn, :], self.fdata[ntkey][tskey]['dEdft']), names[7]: np.std( hdt.calculaterootmeansqrerror( self.fdata[ntkey][tskey]['dEani'][0:Nn, :], self.fdata[ntkey][tskey]['dEdft'], axis=1)), names[8]: hdt.calculatemeanabserror(self.fdata[ntkey][tskey]['Fani'][Nn, :], self.fdata[ntkey][tskey]['Fdft']), names[9]: np.std( hdt.calculatemeanabserror( self.fdata[ntkey][tskey]['Fani'][0:Nn, :], self.fdata[ntkey][tskey]['Fdft'], axis=1)), names[10]: hdt.calculaterootmeansqrerror( self.fdata[ntkey][tskey]['Fani'][Nn, :], self.fdata[ntkey][tskey]['Fdft']), names[11]: np.std( hdt.calculaterootmeansqrerror( self.fdata[ntkey][tskey]['Fani'][0:Nn, :], self.fdata[ntkey][tskey]['Fdft'], axis=1)), #'dEMAE': hdt.calculatemeanabserror(self.fdata[ntkey][tskey]['dEani'], self.fdata[ntkey][tskey]['dEdft']), #'dERMS': hdt.calculaterootmeansqrerror(self.fdata[ntkey][tskey]['dEani'], self.fdata[ntkey][tskey]['dEdft']), #'ERMAE': hdt.calculatemeanabserror(self.fdata[ntkey][tskey]['Erani'][idx], self.fdata[ntkey][tskey]['Erdft'][idx]), #'ERRMS': hdt.calculaterootmeansqrerror(self.fdata[ntkey][tskey]['Erani'][idx], self.fdata[ntkey][tskey]['rdft'][idx]), }
def generate_fullset_errors(self, ntkey, tslist): #idx = np.nonzero(self.fdata[ntkey][tskey]['Erdft']) #tskeys = self.fdata[ntkey].keys() if not tslist: tskeys = self.fdata[ntkey].keys() else: tskeys = tslist Nn = self.fdata[ntkey][list(tskeys)[0]]['Eani'].shape[0] - 1 #print(self.fdata[ntkey][tskey]['Fdft'].shape) return { names[0]: hdt.calculatemeanabserror( np.concatenate([ self.fdata[ntkey][tskey]['Eani'][Nn, :] for tskey in tskeys ]), np.concatenate( [self.fdata[ntkey][tskey]['Edft'] for tskey in tskeys])), names[1]: np.std( hdt.calculatemeanabserror(np.hstack([ self.fdata[ntkey][tskey]['Eani'][0:Nn, :] for tskey in tskeys ]), np.hstack([ self.fdata[ntkey][tskey]['Edft'] for tskey in tskeys ]), axis=1)), names[2]: hdt.calculaterootmeansqrerror( np.concatenate([ self.fdata[ntkey][tskey]['Eani'][Nn, :] for tskey in tskeys ]), np.concatenate( [self.fdata[ntkey][tskey]['Edft'] for tskey in tskeys])), names[3]: np.std( hdt.calculaterootmeansqrerror( np.hstack([ self.fdata[ntkey][tskey]['Eani'][0:Nn, :] for tskey in tskeys ]), np.hstack([ self.fdata[ntkey][tskey]['Edft'] for tskey in tskeys ]), axis=1)), names[4]: hdt.calculatemeanabserror( np.concatenate([ self.fdata[ntkey][tskey]['dEani'][Nn, :] for tskey in tskeys ]), np.concatenate( [self.fdata[ntkey][tskey]['dEdft'] for tskey in tskeys])), names[5]: np.std( hdt.calculatemeanabserror(np.hstack([ self.fdata[ntkey][tskey]['dEani'][0:Nn, :] for tskey in tskeys ]), np.hstack([ self.fdata[ntkey][tskey]['dEdft'] for tskey in tskeys ]), axis=1)), names[6]: hdt.calculaterootmeansqrerror( np.concatenate([ self.fdata[ntkey][tskey]['dEani'][Nn, :] for tskey in tskeys ]), np.concatenate( [self.fdata[ntkey][tskey]['dEdft'] for tskey in tskeys])), names[7]: np.std( hdt.calculaterootmeansqrerror( np.hstack( [ self.fdata[ntkey][tskey]['dEani'][0:Nn, :] for tskey in tskeys ]), np.hstack([ self.fdata[ntkey][tskey]['dEdft'] for tskey in tskeys ]), axis=1)), names[8]: hdt.calculatemeanabserror( np.concatenate([ self.fdata[ntkey][tskey]['Fani'][Nn, :] for tskey in tskeys ]), np.concatenate( [self.fdata[ntkey][tskey]['Fdft'] for tskey in tskeys])), names[9]: np.std( hdt.calculatemeanabserror(np.hstack([ self.fdata[ntkey][tskey]['Fani'][0:Nn, :] for tskey in tskeys ]), np.hstack([ self.fdata[ntkey][tskey]['Fdft'] for tskey in tskeys ]), axis=1)), names[10]: hdt.calculaterootmeansqrerror( np.concatenate([ self.fdata[ntkey][tskey]['Fani'][Nn, :] for tskey in tskeys ]), np.concatenate( [self.fdata[ntkey][tskey]['Fdft'] for tskey in tskeys])), names[11]: np.std( hdt.calculaterootmeansqrerror( np.hstack( [ self.fdata[ntkey][tskey]['Fani'][0:Nn, :] for tskey in tskeys ]), np.hstack([ self.fdata[ntkey][tskey]['Fdft'] for tskey in tskeys ]), axis=1)), #'FMAEm': hdt.calculatemeanabserror(self.fdata[ntkey][tskey]['Fani'][Nn,:], self.fdata[ntkey][tskey]['Fdft']), #'FMAEs': np.std(hdt.calculatemeanabserror(self.fdata[ntkey][tskey]['Fani'][0:Nn,:], self.fdata[ntkey][tskey]['Fdft'], axis=1)), #'FRMSm': hdt.calculaterootmeansqrerror(self.fdata[ntkey][tskey]['Fani'][Nn,:], self.fdata[ntkey][tskey]['Fdft']), #'FRMSs': np.std(hdt.calculaterootmeansqrerror(self.fdata[ntkey][tskey]['Fani'][0:Nn, :],self.fdata[ntkey][tskey]['Fdft'], axis=1)), #'dEMAE': hdt.calculatemeanabserror(self.fdata[ntkey][tskey]['dEani'], self.fdata[ntkey][tskey]['dEdft']), #'dERMS': hdt.calculaterootmeansqrerror(self.fdata[ntkey][tskey]['dEani'], self.fdata[ntkey][tskey]['dEdft']), #'ERMAE': hdt.calculatemeanabserror(self.fdata[ntkey][tskey]['Erani'][idx], self.fdata[ntkey][tskey]['Erdft'][idx]), #'ERRMS': hdt.calculaterootmeansqrerror(self.fdata[ntkey][tskey]['Erani'][idx], self.fdata[ntkey][tskey]['rdft'][idx]), }
def plot_corr_dist_axes(ax, Xp, Xa, cmap, labelx, labely, plabel, vmin=0, vmax=0): Fmx = Xa.max() Fmn = Xa.min() # Plot ground truth line ax.plot([Fmn, Fmx], [Fmn, Fmx], '--', c='red', linewidth=3) # Set labels ax.set_xlabel(labelx, fontsize=26) ax.set_ylabel(labely, fontsize=26) # Plot 2d Histogram if vmin == 0 and vmax == 0: bins = ax.hist2d(Xp, Xa, bins=200, norm=LogNorm(), range=[[Fmn, Fmx], [Fmn, Fmx]], cmap=cmap) else: bins = ax.hist2d(Xp, Xa, bins=200, norm=LogNorm(), range=[[Fmn, Fmx], [Fmn, Fmx]], cmap=cmap, vmin=vmin, vmax=vmax) # Build color bar #cbaxes = fig.add_axes([0.91, 0.1, 0.03, 0.8]) # Annotate with label ax.text(0.25 * ((Fmx - Fmn)) + Fmn, 0.06 * ((Fmx - Fmn)) + Fmn, plabel, fontsize=26) # Annotate with errors PMAE = hdt.calculatemeanabserror(Xa, Xp) PRMS = hdt.calculaterootmeansqrerror(Xa, Xp) ax.text(0.6 * ((Fmx - Fmn)) + Fmn, 0.2 * ((Fmx - Fmn)) + Fmn, 'MAE=' + "{:.3f}".format(PMAE) + '\nRMSE=' + "{:.3f}".format(PRMS), fontsize=30, bbox={ 'facecolor': 'white', 'alpha': 0.5, 'pad': 5 }) axins = zoomed_inset_axes(ax, 2., loc=2) # zoom = 6 sz = 0.1 * (Fmx - Fmn) axins.hist2d(Xp, Xa, bins=50, range=[[Xa.mean() - sz, Xa.mean() + sz], [Xp.mean() - sz, Xp.mean() + sz]], norm=LogNorm(), cmap=cmap) axins.plot([Xp.mean() - sz, Xp.mean() + sz], [Xp.mean() - sz, Xp.mean() + sz], '--', c='r', linewidth=3) # sub region of the original image x1, x2, y1, y2 = Xa.mean() - sz, Xa.mean() + sz, Xp.mean() - sz, Xp.mean( ) + sz axins.set_xlim(x1, x2) axins.set_ylim(y1, y2) axins.yaxis.tick_right() plt.xticks(visible=True) plt.yticks(visible=True) mark_inset(ax, axins, loc1=1, loc2=3, fc="none", ec="1.5") return bins
def plot_error_by_net(self, props, dsets, ntwks=[], fontsize=14, bbox_to_anchor=(1.0, 1.1), figsize=(15.0, 12.0), ncol=1, errortype='MAE', storepath=''): N = len(dsets) ind = np.arange(N) # the x locations for the groups rects = [] nets = [] label_size = fontsize mpl.rcParams['xtick.labelsize'] = label_size mpl.rcParams['ytick.labelsize'] = label_size colors = cm.viridis(np.linspace(0, 1, len(props))) fig, axes = plt.subplots(2, 3, figsize=figsize) if len(ntwks) == 0: keys = list(self.fdata.keys()) keys.sort() else: keys = ntwks for j, (ds, ax) in enumerate(zip(dsets, axes.flatten())): higt = dict() errs = dict() for i, (tk, c) in enumerate(zip(props, colors)): higt.update({tk[0]: []}) errs.update({tk[0]: []}) for k in keys: if errortype is 'MAE': Nn = self.fdata[k][ds][tk[2]].shape[0] - 1 height = hdt.calculatemeanabserror( self.fdata[k][ds][tk[2]][Nn, :], self.fdata[k][ds][tk[3]]) error = np.std( hdt.calculatemeanabserror(self.fdata[k][ds][tk[2]], self.fdata[k][ds][tk[3]], axis=1)) higt[tk[0]].append(height) errs[tk[0]].append(error) elif errortype is 'RMSE': Nn = self.fdata[k][ds][tk[2]].shape[0] - 1 height = hdt.calculaterootmeansqrerror( self.fdata[k][ds][tk[2]][Nn, :], self.fdata[k][ds][tk[3]]) error = np.std( hdt.calculaterootmeansqrerror( self.fdata[k][ds][tk[2]], self.fdata[k][ds][tk[3]], axis=1)) higt[tk[0]].append(height) errs[tk[0]].append(error) x_axis = np.arange(len(higt[tk[0]][:-1])) #ax.set_yscale("log", nonposy='clip') rects.append( ax.plot(x_axis, higt[tk[0]][:-1], '-o', color=c, linewidth=5, label=tk[0])) ax.errorbar(x_axis, higt[tk[0]][:-1], yerr=errs[tk[0]][:-1], fmt='.', capsize=8, elinewidth=3, color=c, ecolor=c, markeredgewidth=2) ax.plot([-0.1, len(higt[tk[0]][:-1]) - 1 + 0.1], [higt[tk[0]][-1], higt[tk[0]][-1]], '--', color=c, linewidth=5) ax.legend(fontsize=fontsize, bbox_to_anchor=bbox_to_anchor, ncol=ncol) ax.set_title(ds, fontsize=fontsize + 2) ax.set_xticks(x_axis) ax.set_xticklabels([d for d in keys[:-1]]) ax.set_ylabel(errortype, fontsize=fontsize) ax.set_xlabel('Active Learning Version', fontsize=fontsize) #ax.set_ylim([0.1,100]) # add some text for labels, title and axes ticks #ax.set_title(p[0], fontsize=fontsize) #ax.set_xticks(ind + ((len(keys)+3)*width) / len(props)) #if j == 0: #ax.legend(rects, keys, fontsize=fontsize, bbox_to_anchor=bbox_to_anchor, ncol=ncol) if storepath: pp = PdfPages(storepath) pp.savefig(fig) pp.close() else: plt.show()
print(pms) params = [] for p in nnr.coefs_: params.append(p.flatten()) Np = np.concatenate(params).size print(Np) print('Predicting...') P = nnr.predict(X_train) P = scaler.inverse_transform(P) A = scaler.inverse_transform(y_train.flatten()) print(hdt.calculaterootmeansqrerror(P, A)) print(hdt.calculatemeanabserror(P, A)) #plt.plot(A,A, color='black') #plt.scatter(P,A,color='blue') #plt.show() print('Predicting...') P = nnr.predict(X_test) P = scaler.inverse_transform(P) A = scaler.inverse_transform(y_test.flatten()) print('RMSE:', hdt.calculaterootmeansqrerror(P, A)) print('MAE: ', hdt.calculatemeanabserror(P, A)) print('r^2:', metrics.r2_score(A, P, sample_weight=None, multioutput=None))