Example #1
0
    def generate_fullset_peratom_errors(self, ntkey, tslist):
        #idx = np.nonzero(self.fdata[ntkey][tskey]['Erdft'])

        if not tslist:
            tskeys = self.fdata[ntkey].keys()
        else:
            tskeys = tslist

        Nn = self.fdata[ntkey][list(tskeys)[0]]['Eani'].shape[0] - 1

        #print(self.fdata[ntkey]['GDB07to09']['Eani'][Nn,:])
        #print(self.fdata[ntkey]['GDB07to09']['Na'])
        #print(self.fdata[ntkey]['GDB07to09']['Eani'][Nn,:]/self.fdata[ntkey]['GDB07to09']['Na'])

        return {
            names[0]:
            1000 * hdt.calculatemeanabserror(
                np.concatenate([
                    self.fdata[ntkey][tskey]['Eani'][Nn, :] /
                    self.fdata[ntkey][tskey]['Na'] for tskey in tskeys
                ]),
                np.concatenate([
                    self.fdata[ntkey][tskey]['Edft'] /
                    self.fdata[ntkey][tskey]['Na'] for tskey in tskeys
                ])),
            names[2]:
            1000 * hdt.calculaterootmeansqrerror(
                np.concatenate([
                    self.fdata[ntkey][tskey]['Eani'][Nn, :] /
                    self.fdata[ntkey][tskey]['Na'] for tskey in tskeys
                ]),
                np.concatenate([
                    self.fdata[ntkey][tskey]['Edft'] /
                    self.fdata[ntkey][tskey]['Na'] for tskey in tskeys
                ])),
            names[4]:
            1000 * hdt.calculatemeanabserror(
                np.concatenate([
                    self.fdata[ntkey][tskey]['dEani'][Nn, :] /
                    self.fdata[ntkey][tskey]['Na2'] for tskey in tskeys
                ]),
                np.concatenate([
                    self.fdata[ntkey][tskey]['dEdft'] /
                    self.fdata[ntkey][tskey]['Na2'] for tskey in tskeys
                ])),
            names[6]:
            1000 * hdt.calculaterootmeansqrerror(
                np.concatenate([
                    self.fdata[ntkey][tskey]['dEani'][Nn, :] /
                    self.fdata[ntkey][tskey]['Na2'] for tskey in tskeys
                ]),
                np.concatenate([
                    self.fdata[ntkey][tskey]['dEdft'] /
                    self.fdata[ntkey][tskey]['Na2'] for tskey in tskeys
                ])),
        }
def Ecorrplot(ax1, Eact, Ecmp, mlbl, color, lab=False):
    mx = Eact.max()
    mn = Eact.min()

    if lab:
        ax1.plot((mn, mx), (mn, mx), color='black', label='DFT', linewidth=5)
    else:
        ax1.plot((mn, mx), (mn, mx), color='black', linewidth=5)

    rmse = gt.calculaterootmeansqrerror(Eact, Ecmp)
    ax1.scatter(Eact,
                Ecmp,
                marker=r'o',
                color=color,
                label=mlbl + ' RMSE: ' + "{:.3f}".format(rmse) + ' kcal/mol',
                linewidth=1)

    ax1.set_xlim([mn, mx])
    ax1.set_ylim([mn, mx])

    #ax1.set_title("title)
    ax1.set_ylabel('$\Delta E_{cmp}$ (kcal/mol)')
    ax1.set_xlabel('$\Delta E_{ref}$ (kcal/mol)')
    ax1.legend(bbox_to_anchor=(0.01, 0.99),
               loc=2,
               borderaxespad=0.,
               fontsize=16)
Example #3
0
def plot_irc_data(axes, file, rcf, title, ntwl, cnstfile, saefile, dir, idx):
    xyz, typ, Eact = hdt.readncdat(file, np.float32)
    Rc = np.load(rcf)

    # Shift reference to reactant
    #Eact = Eact[::-1]
    Eact = hdt.hatokcal * (Eact - Eact[0])

    # Plot reference results
    axes.plot(Rc['x'][:, 1], Eact, color='black', linewidth=3)

    # Plot ANI results
    color = cm.rainbow(np.linspace(0, 1, len(ntwl)))
    terr = np.zeros(len(ntwl))
    derr = np.zeros(len(ntwl))
    berr = np.zeros(len(ntwl))
    for i, (nt, c) in enumerate(zip(ntwl, color)):
        ncr = pync.conformers(dir + cnstfile, dir + saefile,
                              rcdir + nt[0] + 'networks/', 0)

        # Set the conformers in NeuroChem
        ncr.setConformers(confs=xyz, types=list(typ))

        # Compute Energies of Conformations
        E1 = ncr.energy()

        # Shift ANI E to reactant
        E1 = hdt.hatokcal * (E1 - E1[0])

        # Calculate error
        errn = hdt.calculaterootmeansqrerror(E1, Eact)

        terr[i] = errn
        derr[i] = np.abs(
            np.abs((E1[0] - E1[-1])) - np.abs((Eact[0] - Eact[-1])))
        berr[i] = np.abs(E1.max() - Eact.max())

        # Plot
        axes.plot(Rc['x'][:, 1],
                  E1,
                  'r--',
                  color=c,
                  label="[" + nt[1] + "]: " + "{:.2f}".format(errn),
                  linewidth=2)
        #axes.plot([Rc['x'][:,1].min(),Rc['x'][:,1].max()],[E1[-1],E1[-1]], 'r--', color=c)
        #axes.plot([Rc['x'][:,1].min(),Rc['x'][:,1].max()],[E1[0],E1[0]], 'r--', color=c)

    axes.set_xlim([Rc['x'][:, 1].min(), Rc['x'][:, 1].max()])
    axes.legend(loc="upper left", fontsize=12)
    if idx < 6:
        axes.set_title(title, color='green', fontdict={'weight': 'bold'})
    else:
        axes.set_title(title, color='red', fontdict={'weight': 'bold'})
    return terr, derr, berr
Eotr3 = gt.hatokcal * Eotr3

Emax = 300.0
Ecmp1 = setmaxE(Eact, Ecmp1, Emax)
Ecmp2 = setmaxE(Eact, Ecmp2, Emax)
Ecmp3 = setmaxE(Eact, Ecmp3, Emax)
Ecmp4 = setmaxE(Eact, Ecmp4, Emax)
Ecmp5 = setmaxE(Eact, Ecmp5, Emax)
Eotr1 = setmaxE(Eact, Eotr1, Emax)
Eotr2 = setmaxE(Eact, Eotr2, Emax)
Eotr3 = setmaxE(Eact, Eotr3, Emax)
Eact = setmaxE(Eact, Eact, Emax)

print('Act count: ' + str(Eact.shape[0]))

rmse1 = gt.calculaterootmeansqrerror(Eact, Eotr1)
rmse2 = gt.calculaterootmeansqrerror(Eact, Eotr2)
rmse3 = gt.calculaterootmeansqrerror(Eact, Eotr3)
rmse4 = gt.calculaterootmeansqrerror(Eact, Ecmp1)
rmse5 = gt.calculaterootmeansqrerror(Eact, Ecmp2)
rmse6 = gt.calculaterootmeansqrerror(Eact, Ecmp3)
rmse7 = gt.calculaterootmeansqrerror(Eact, Ecmp4)
rmse8 = gt.calculaterootmeansqrerror(Eact, Ecmp5)

#plt.scatter(IDX, Eact, marker='o' , color='black',  linewidth=3)

print("Spearman corr. DFTB:  " + "{:.7f}".format(st.spearmanr(Eotr1, Eact)[0]))
print("Spearman corr. PM6:   " + "{:.7f}".format(st.spearmanr(Eotr2, Eact)[0]))
print("Spearman corr. AM1:   " + "{:.7f}".format(st.spearmanr(Eotr3, Eact)[0]))
print("Spearman corr. ANI-1: " + "{:.7f}".format(st.spearmanr(Ecmp1, Eact)[0]))
Example #5
0
rcdir = '/home/jujuman/Research/ANI-DATASET/RXN1_TNET/training/rxn1to6/ani_benz_rxn_ntwk/'
cnstfile = '../../rHCNO-4.6A_16-3.1A_a4-8.params'
saefile = '../../sae_6-31gd.dat'

ncr = pync.conformers(rcdir + cnstfile, rcdir + saefile, rcdir + 'networks/',
                      1)

# Set the conformers in NeuroChem
ncr.setConformers(confs=xyz, types=list(typ))

# Compute Energies of Conformations
E1 = ncr.energy()

# Shift ANI E to reactant
E1 = E1[0:][::-1]
Ea = Ea[0:][::-1]
#x1 = np.linalg.norm(xyz[:,9,:] - xyz[0,9,:],axis=1)
#x2 = np.linalg.norm(xyz2[:,9,:] - xyz[0,9,:],axis=1)
#print(x2)

print(hdt.calculaterootmeansqrerror(hdt.hatokcal * E1, hdt.hatokcal * Ea))

plt.plot(Rc['x'][:, 1], hdt.hatokcal * (E1 - E1[0]), color='blue', linewidth=3)
plt.plot(Rc['x'][:, 1],
         hdt.hatokcal * (Ea - Ea[0]),
         'r--',
         color='black',
         linewidth=3)

plt.show()
def graphEdiffDelta2D(ax, title, data1, data2, Na, min, max):
    #data1 = gt.convert * data1
    #data2 = gt.convert * data2

    x, y, z, d = gt.calculateelementdiff2D(data1)
    x2, y2, z2, d2 = gt.calculateelementdiff2D(data2)

    RMSE = gt.calculaterootmeansqrerror(d, d2) / float(Na)

    print('Number of atoms: ' + str(Na))
    print('RMSE: ' + str(RMSE) + ' kcal/mol/atom')
    print('RMSE: ' + str(float(Na) * RMSE) + ' kcal/mol')

    z = np.abs(z - z2)

    C = data1.shape[0]

    mat = np.ndarray(shape=(C, C), dtype=float)

    for i in x:
        for j in y:
            I = int(i)
            J = int(j)
            mat[J, I] = z[J + I * C]

    mat = np.transpose(mat)
    mat = flipmat(mat, C)

    #get discrete colormap
    cmap = plt.get_cmap('RdBu', np.max(mat) - np.min(mat) + 1)

    # Show mat
    im = ax.matshow(mat, vmin=min, vmax=max)

    th = ax.set_title(title, fontsize=16)
    th.set_position([0.5, 1.005])

    cmap = plt.cm.jet
    norm = plt.Normalize(min, max)
    rgba = cmap(norm(mat))
    for i in range(C):
        rgba[range(i + 1, C), C - i - 1, :3] = 1, 1, 1
    ax.imshow(rgba, interpolation='nearest')

    # Plot center line
    ax.plot([x.max() + 1 - 0.5, x.min() - 1 + 0.5],
            [y.min() - 0.5, x.max() + 0.5],
            '--',
            color='red',
            linewidth=4,
            alpha=0.8)

    # Set Limits
    ax.set_xlim([-0.06 * x.max(), x.max() + 0.06 * x.max()])
    ax.set_ylim([-0.06 * y.max(), y.max() + 0.06 * y.max()])

    ax.spines["top"].set_visible(False)
    ax.spines["bottom"].set_visible(False)
    ax.spines["left"].set_visible(False)
    ax.spines["right"].set_visible(False)

    #ax.xaxis.tick_bottom()
    #ax.yaxis.tick_right()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    return im
Example #7
0
def plot_corr_dist(Xa, Xp, inset=True, figsize=[13, 10]):
    Fmx = Xa.max()
    Fmn = Xa.min()

    label_size = 14
    mpl.rcParams['xtick.labelsize'] = label_size
    mpl.rcParams['ytick.labelsize'] = label_size

    fig, ax = plt.subplots(figsize=figsize)

    # Plot ground truth line
    ax.plot([Fmn, Fmx], [Fmn, Fmx], '--', c='r', linewidth=3)

    # Set labels
    #ax.set_xlabel('$F_{dft}$' + r' $(kcal \times mol^{-1} \times \AA^{-1})$', fontsize=22)
    #ax.set_ylabel('$F_{ani}$' + r' $(kcal \times mol^{-1} \times \AA^{-1})$', fontsize=22)

    ax.set_xlabel('$Q_{dft}$' + r' $(e \times {10}^{-3})$', fontsize=22)
    ax.set_ylabel('$Q_{ani}$' + r' $(e \times {10}^{-3})$', fontsize=22)

    cmap = mpl.cm.viridis

    # Plot 2d Histogram
    bins = ax.hist2d(Xa,
                     Xp,
                     bins=200,
                     norm=LogNorm(),
                     range=[[Fmn, Fmx], [Fmn, Fmx]],
                     cmap=cmap)

    # Build color bar
    #cbaxes = fig.add_axes([0.91, 0.1, 0.03, 0.8])
    cb1 = fig.colorbar(bins[-1], cmap=cmap)
    cb1.set_label('Count', fontsize=16)

    # Annotate with errors
    PMAE = hdn.calculatemeanabserror(Xa, Xp)
    PRMS = hdn.calculaterootmeansqrerror(Xa, Xp)
    ax.text(0.75 * ((Fmx - Fmn)) + Fmn,
            0.43 * ((Fmx - Fmn)) + Fmn,
            'MAE=' + "{:.1f}".format(PMAE) + '\nRMSE=' + "{:.1f}".format(PRMS),
            fontsize=20,
            bbox={
                'facecolor': 'white',
                'alpha': 0.5,
                'pad': 5
            })

    if inset:
        axins = zoomed_inset_axes(ax, 2.2, loc=2)  # zoom = 6

        sz = 6
        axins.hist2d(Xa,
                     Xp,
                     bins=50,
                     range=[[Fmn / sz, Fmx / sz], [Fmn / sz, Fmx / sz]],
                     norm=LogNorm(),
                     cmap=cmap)
        axins.plot([Xa.min(), Xa.max()], [Xa.min(), Xa.max()],
                   '--',
                   c='r',
                   linewidth=3)

        # sub region of the original image
        x1, x2, y1, y2 = Fmn / sz, Fmx / sz, Fmn / sz, Fmx / sz
        axins.set_xlim(x1, x2)
        axins.set_ylim(y1, y2)
        axins.yaxis.tick_right()

        plt.xticks(visible=True)
        plt.yticks(visible=True)

        mark_inset(ax, axins, loc1=1, loc2=3, fc="none", ec="0.5")

        Ferr = Xa - Xp
        std = np.std(Ferr)
        men = np.mean(Ferr)
        axh = plt.axes([.49, .14, .235, .235])
        axh.hist(Ferr,
                 bins=75,
                 range=[men - 4 * std, men + 4 * std],
                 normed=True)
        axh.set_title('Difference distribution')

    #plt.draw()
    plt.show()
Example #8
0
def produce_scan(ax,title,xlabel,cnstfile,saefile,nnfdir,dtdir,dt1,dt2,dt3,smin,smax,iscale,ishift):
    xyz, typ, Eact = gt.readncdat(dtdir + dt1,np.float32)
    xyz2, typ2, Eact2 = gt.readncdat(dtdir + dt2)
    xyz3, typ3, Eact3 = gt.readncdat(dtdir + dt3)

    #gt.writexyzfile("/home/jujuman/Dropbox/ChemSciencePaper.AER/TestCases/Dihedrals/4-Cyclohexyl-1-butanol/optimization/dihedral_"+dt1+".xyz",xyz,typ)

    #Eact = np.array(Eact)
    #Eact2 = np.array(Eact2)
    #Eact3 = np.array(Eact3)

    # Construct pyNeuroChem classes
    nc1 = pync.conformers(cnstfile, saefile, nnfdir, 0)

    # Set the conformers in NeuroChem
    nc1.setConformers(confs=xyz, types=list(typ))

    # Print some data from the NeuroChem
    print('1) Number of Atoms Loaded: ' + str(nc1.getNumAtoms()))
    print('1) Number of Confs Loaded: ' + str(nc1.getNumConfs()))

    # Compute Forces of Conformations
    print('Computing energies 1...')
    _t1b = tm.time()
    Ecmp1 = nc1.energy()
    print(Ecmp1)
    print('Computation complete 1. Time: ' + "{:.4f}".format((tm.time() - _t1b) * 1000.0) + 'ms')

    n = smin
    m = smax
    Ecmp1 = gt.hatokcal * Ecmp1
    Eact  = gt.hatokcal * Eact
    Eact2 = gt.hatokcal * Eact2
    Eact3 = gt.hatokcal * Eact3

    IDX = np.arange(0, Eact.shape[0], 1, dtype=float) * iscale + ishift

    IDX = IDX[n:m]
    Eact = Eact[n:m]
    Eact2 = Eact2[n:m]
    Eact3 = Eact3[n:m]
    Ecmp1 = Ecmp1[n:m]

    Ecmp1 = Ecmp1 - Ecmp1.min()
    Eact  = Eact  - Eact.min()
    Eact2 = Eact2 - Eact2.min()
    Eact3 = Eact3 - Eact3.min()

    rmse1 = gt.calculaterootmeansqrerror(Eact, Ecmp1)
    rmse3 = gt.calculaterootmeansqrerror(Eact, Eact2)
    rmse4 = gt.calculaterootmeansqrerror(Eact, Eact3)

    print("Spearman corr. 1: " + "{:.3f}".format(st.spearmanr(Ecmp1, Eact)[0]))
    print("Spearman corr. 2: " + "{:.3f}".format(st.spearmanr(Eact2, Eact)[0]))
    print("Spearman corr. 3: " + "{:.3f}".format(st.spearmanr(Eact3, Eact)[0]))

    ax.plot(IDX, Eact, '-', marker=r'o', color='black', label='DFT',
             linewidth=2, markersize=7)
    ax.plot(IDX, Ecmp1, ':', marker=r'D', color='red', label='ANI-1 RMSE: ' + gt.to_precision(rmse1,2) + ' kcal/mol',
             linewidth=2, markersize=5)
    ax.plot(IDX, Eact2, ':', marker=r'v', color='blue', label='DFTB  RMSE: ' + gt.to_precision(rmse3,2) + ' kcal/mol',
             linewidth=2, markersize=5)
    ax.plot(IDX, Eact3, ':', marker=r'*', color='orange', label='PM6   RMSE: ' + gt.to_precision(rmse4,2) + ' kcal/mol',
             linewidth=2, markersize=7)

    #ax.plot(IDX, Eact, color='black', label='DFT', linewidth=3)
    #ax.scatter(IDX, Eact, marker='o', color='black', linewidth=4)

    th = ax.set_title(title,fontsize=16)
    th.set_position([0.5,1.005])

    # Set Limits
    ax.set_xlim([ IDX.min(),IDX.max()])
    ax.set_ylim([Eact.min()-1.0,Eact.max()+1.0])

    ax.set_ylabel('$\Delta$E calculated (kcal/mol)')
    ax.set_xlabel(xlabel)
    ax.legend(bbox_to_anchor=(0.2, 0.98), loc=2, borderaxespad=0., fontsize=14)
Example #9
0
Ec2 = []
Ea = []
rmp1 = []
bar1 = []
rmp2 = []
bar2 = []
for i, f in enumerate(files):
    plot_irc(axarr[int(np.floor(i / X)), i % X], i, d, f)

plt.show()

bar1 = np.array(bar1)  # Barrier 1
bar2 = np.array(bar2)  # Barrier 2

rmp1 = np.array(rmp1)  # Reactant product 1
rmp2 = np.array(rmp2)  # Reactant product 2

Ec1 = np.concatenate(Ec1)
Ec2 = np.concatenate(Ec2)
Ea = np.concatenate(Ea)

#plt.suptitle(str(len(files)) + " Diels-Alder reactions (x axis=$R_c$;y-axis=relative E [kcal/mol])\n"+cts+"\n"+cds+"\n"+cbs,fontsize=14,fontweight='bold',y=0.99)

print('Barrier   - ANI retrain:',
      bar1.sum() / bar1.size, 'Original ANI:',
      bar2.sum() / bar2.size)
print('Reac/prod - ANI retrain:',
      rmp1.sum() / rmp1.size, 'Original ANI:',
      rmp2.sum() / rmp2.size)
print('IRC RMSE  - ANI Retrain:', hdt.calculaterootmeansqrerror(Ec1, Ea),
      'Original ANI:', hdt.calculaterootmeansqrerror(Ec2, Ea))
for j, k in zip(Eact, Ecmp1):
    print('  ', j, ':', k)

print("Time 1: " + "{:.4f}".format(t1 / 1000.0) + 's')

Ecmp1 = gt.hatokcal * Ecmp1
Eact = gt.hatokcal * Eact

Emax = 100.0
Ecmp1 = setmaxE(Eact, Ecmp1, Emax)
Eact = setmaxE(Eact, Eact, Emax)

print('NMOL: ', Ecmp1.shape[0])

rmse2 = gt.calculaterootmeansqrerror(Eact, Ecmp1)

mx = Eact.max()
mn = Eact.min()

#<<<<<<< HEAD
#plt.scatter(IDX, Eact, marker='o' , color='black',  linewidth=3)

#=======
#print ( "Spearman corr. DFTB: " + "{:.3f}".format(st.spearmanr(Eotr,Eact)[0]) )
#>>>>>>> 1ec96245cdd1c6d1647ffeed1a6bec3a4b7e4bb4
print("Spearman corr. TGM 08: " +
      "{:.3f}".format(st.spearmanr(Ecmp1, Eact)[0]))

slope2, intercept2, r_value2, p_value2, std_err2 = st.linregress(Eact, Ecmp1)
Example #11
0
pms = nnr.get_params()
print(pms)

params = []
for p in nnr.coefs_:
    params.append(p.flatten())

Np = np.concatenate(params).size
print(Np)

print('Predicting...')
P = nnr.predict(X_train)
P = scaler.inverse_transform(P)
A = scaler.inverse_transform(y_train.flatten())

print(hdt.calculaterootmeansqrerror(P, A))
print(hdt.calculatemeanabserror(P, A))

#plt.plot(A,A, color='black')
#plt.scatter(P,A,color='blue')
#plt.show()

print('Predicting...')
P = nnr.predict(X_test)
P = scaler.inverse_transform(P)
A = scaler.inverse_transform(y_test.flatten())

print('RMSE:', hdt.calculaterootmeansqrerror(P, A))
print('MAE: ', hdt.calculatemeanabserror(P, A))

print('r^2:', metrics.r2_score(A, P, sample_weight=None, multioutput=None))
Example #12
0
sae = hdt.compute_sae(saefile, scan[1])
serg = scan[2] - sae

# Set the conformers in NeuroChem
nc.setConformers(confs=scan[0], types=list(scan[1]))
nc2.setConformers(confs=scan[0], types=list(scan[1]))

x = 0.05 * np.array(range(serg.shape[0]), dtype=np.float64) + 0.6
print(len(x))

popt = np.load('mp_ani_params_test.npz')['param']
fsEc = hdt.buckingham_pot(sdat, *popt)
#fsEc = hdt.src_pot(sdat)

aerg = nc.energy() + fsEc - sae
a2erg = nc2.energy() - sae

frmse = hdt.calculaterootmeansqrerror(serg, fsEc)

plt.plot(x, serg, color='black', label='QM')
plt.plot(x, fsEc, color='red', label='SRC')
plt.plot(x, aerg, color='green', label='Corrected')
plt.plot(x, a2erg, color='blue', label='Original')

plt.xlim(0, 5)

plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.show()
Example #13
0
ani2 = hdn.hatokcal * nc.energy()

anipath = '/home/jujuman/Research/GDB-11-wB97X-6-31gd/ANI-c08e-ntwk_newtrain/'
cnstfile = anipath + '/rHCNO-4.6A_16-3.1A_a4-8.params'
saefile = anipath + '/sae_6-31gd.dat'
nnfdir = anipath + '/networks/'

# Construct pyNeuroChem class
nc = pync.conformers(cnstfile, saefile, nnfdir, 0)

# Set the conformers in NeuroChem
nc.setConformers(confs=xyz, types=dataf[2])

ani3 = hdn.hatokcal * nc.energy()

rmse1 = hdn.calculaterootmeansqrerror(ani1, eng)
rmse2 = hdn.calculaterootmeansqrerror(ani2, eng)
rmse3 = hdn.calculaterootmeansqrerror(ani3, eng)

xv = xyz[:, 3, :] - xyz[0, 3, :]
x = np.linalg.norm(xv, axis=1)

eng = eng - eng.min()
ani1 = ani1 - ani1.min()
ani2 = ani2 - ani2.min()
ani3 = ani3 - ani3.min()

f, axarr = plt.subplots(2, 2)

axarr[0, 0].plot(x, eng, 'r-', color='black', label='DFT', linewidth=3)
axarr[0, 1].plot(x, eng, 'r-', color='black', label='DFT', linewidth=3)
Example #14
0
    '/home/jujuman/Dropbox/ChemSciencePaper.AER/JustinsDocuments/ACS_april_2017/DipeptidePhiPsi/data.xyz',
    xyz, list(data[1]))

data2 = hdt.readncdat(
    '/home/jujuman/Dropbox/ChemSciencePaper.AER/JustinsDocuments/ACS_april_2017/DipeptidePhiPsi/data.dat',
    type=np.float32)

Eact = np.array(Eact)
Edft = data2[2]

z = np.array(hdt.hatokcal * (Eact - Eact.min()),
             dtype=np.float32).reshape(x.shape[0], x.shape[0])
z2 = np.array(hdt.hatokcal * (Edft - Edft.min()),
              dtype=np.float32).reshape(x.shape[0], x.shape[0])

rmse = hdt.calculaterootmeansqrerror(z, z2)
print('RMSE: ', rmse)

Spline1 = scipy.interpolate.RectBivariateSpline(x, x, z)
Spline2 = scipy.interpolate.RectBivariateSpline(x, x, z2)
Spline3 = scipy.interpolate.RectBivariateSpline(x, x, abs(z - z2))
XY_New = np.linspace(-180, 180, 200)

f, ax = plt.subplots(nrows=1, ncols=3, sharex=True, sharey=True)
f.set_size_inches(12, 6)

maxi = np.concatenate([z, z2]).max()
mini = np.concatenate([z, z2]).min()

font = {'family': 'Bitstream Vera Sans', 'weight': 'normal', 'size': 18}
def plot_irc_data(axes, file, title, ntwl, cnstfile, saefile, dir, trained):
    Eact, xyz, typ, Rc = pyg.read_irc(file)
    Rc = Rc[:, 1]
    Rc = Rc[::-1]

    print(Eact.shape, Rc.shape, xyz.shape)

    # Shift reference to reactant
    #Eact = Eact[::-1]
    Eact = hdt.hatokcal * (Eact - Eact[-1])

    # Plot reference results
    axes.scatter(Rc, Eact, color='black', linewidth=3)

    # Plot ANI results
    color = cm.rainbow(np.linspace(0, 1, len(ntwl)))
    terr = np.zeros(len(ntwl))
    derr = np.zeros(len(ntwl))
    berr = np.zeros(len(ntwl))
    for i, (nt, c) in enumerate(zip(ntwl, color)):
        ncr = pync.conformers(dir + nt[0] + cnstfile, dir + nt[0] + saefile,
                              rcdir + nt[0] + 'networks/', 0, True)

        # Set the conformers in NeuroChem
        ncr.setConformers(confs=xyz, types=list(typ))

        # Compute Energies of Conformations
        E1 = ncr.energy()

        # Shift ANI E to reactant
        E1 = hdt.hatokcal * (E1 - E1[-1])

        # Calculate error
        errn = hdt.calculaterootmeansqrerror(E1, Eact)

        terr[i] = errn
        derr[i] = np.abs(
            np.abs((E1[0] - E1[-1])) - np.abs((Eact[0] - Eact[-1])))
        berr[i] = np.abs(E1.max() - Eact.max())

        # Plot
        axes.plot(Rc,
                  E1,
                  'r--',
                  color=c,
                  label="[" + str(i) + "]: " + "{:.1f}".format(berr[i]),
                  linewidth=2)

    #axes.set_xlim([Rc.min(), Rc.max()])
    #axes.set_ylim([-15, 70])
    axes.legend(loc="upper left", fontsize=7)
    if trained:
        axes.set_title(title,
                       color='green',
                       fontdict={'weight': 'bold'},
                       x=0.83,
                       y=0.70)
    else:
        axes.set_title(title,
                       color='red',
                       fontdict={'weight': 'bold'},
                       x=0.83,
                       y=0.70)
    return terr, derr, berr
Example #16
0
 def generate_rmserror(self, ntkey, tskey, prop1, prop2):
     Nn = self.fdata[ntkey][tskey][prop1].shape[0] - 1
     return hdt.calculaterootmeansqrerror(
         self.fdata[ntkey][tskey][prop1][Nn, :],
         self.fdata[ntkey][tskey][prop2])
Example #17
0
    def plot_bar_propsbynet(self,
                            props,
                            dsets,
                            ntwks=[],
                            fontsize=14,
                            bbox_to_anchor=(1.0, 1.1),
                            figsize=(15.0, 12.0),
                            ncol=1,
                            errortype='MAE'):

        N = len(dsets)
        ind = np.arange(N)  # the x locations for the groups
        rects = []
        nets = []

        label_size = fontsize
        mpl.rcParams['xtick.labelsize'] = label_size
        mpl.rcParams['ytick.labelsize'] = label_size

        fig, axes = plt.subplots(len(props), 1, figsize=(30.0, 24.0))

        if len(ntwks) == 0:
            keys = list(self.fdata.keys())
            keys.sort()
        else:
            keys = ntwks

        for j, (p, ax) in enumerate(zip(props, axes.flatten())):
            bars = dict()
            errs = dict()

            width = 0.85 / len(keys)  # the width of the bars

            colors = cm.viridis(np.linspace(0, 1, len(keys)))
            if j == len(keys) - 1:
                colors = 'r'

            for i, (k, c) in enumerate(zip(keys, colors)):
                bars.update({k: []})
                errs.update({k: []})

                for tk in dsets:
                    if errortype is 'MAE':
                        height = hdt.calculatemeanabserror(
                            self.fdata[k][tk][p[2]][5, :],
                            self.fdata[k][tk][p[3]])
                        error = np.std(
                            hdt.calculatemeanabserror(self.fdata[k][tk][p[2]],
                                                      self.fdata[k][tk][p[3]],
                                                      axis=1))

                        #if error > height:
                        #    error = height

                        bars[k].append(height)
                        errs[k].append(error)
                    elif errortype is 'RMSE':
                        height = hdt.calculaterootmeansqrerror(
                            self.fdata[k][tk][p[2]][5, :],
                            self.fdata[k][tk][p[3]])
                        error = np.std(
                            hdt.calculaterootmeansqrerror(
                                self.fdata[k][tk][p[2]],
                                self.fdata[k][tk][p[3]],
                                axis=1))

                        #if error > height:
                        #    error = height

                        bars[k].append(height)
                        errs[k].append(error)

                rects.append(
                    ax.bar(ind + i * width,
                           bars[k],
                           width,
                           color=c,
                           bottom=0.0))
                ax.errorbar(ind + i * width + width / 2.0,
                            bars[k],
                            errs[k],
                            fmt='.',
                            capsize=8,
                            elinewidth=3,
                            color='red',
                            ecolor='red',
                            markeredgewidth=2)
                ax.set_ylim(p[4])

            # add some text for labels, title and axes ticks
            ax.set_ylabel(p[1], fontsize=fontsize)
            ax.set_title(p[0], fontsize=fontsize)
            ax.set_xticks(ind + ((len(keys) + 3) * width) / len(props))
            ax.set_xticklabels([d for d in dsets])
            if j == 0:
                ax.legend(rects,
                          keys,
                          fontsize=fontsize,
                          bbox_to_anchor=bbox_to_anchor,
                          ncol=ncol)

        plt.show()
Example #18
0
    def plot_error_by_net(self,
                          props,
                          dsets,
                          ntwks=[],
                          fontsize=14,
                          bbox_to_anchor=(1.0, 1.1),
                          figsize=(15.0, 12.0),
                          ncol=1,
                          errortype='MAE',
                          storepath=''):

        N = len(dsets)
        ind = np.arange(N)  # the x locations for the groups
        rects = []
        nets = []

        label_size = fontsize
        mpl.rcParams['xtick.labelsize'] = label_size
        mpl.rcParams['ytick.labelsize'] = label_size

        colors = cm.viridis(np.linspace(0, 1, len(props)))

        fig, axes = plt.subplots(2, 3, figsize=figsize)

        if len(ntwks) == 0:
            keys = list(self.fdata.keys())
            keys.sort()
        else:
            keys = ntwks

        for j, (ds, ax) in enumerate(zip(dsets, axes.flatten())):
            higt = dict()
            errs = dict()

            for i, (tk, c) in enumerate(zip(props, colors)):
                higt.update({tk[0]: []})
                errs.update({tk[0]: []})

                for k in keys:
                    if errortype is 'MAE':
                        Nn = self.fdata[k][ds][tk[2]].shape[0] - 1
                        height = hdt.calculatemeanabserror(
                            self.fdata[k][ds][tk[2]][Nn, :],
                            self.fdata[k][ds][tk[3]])
                        error = np.std(
                            hdt.calculatemeanabserror(self.fdata[k][ds][tk[2]],
                                                      self.fdata[k][ds][tk[3]],
                                                      axis=1))

                        higt[tk[0]].append(height)
                        errs[tk[0]].append(error)
                    elif errortype is 'RMSE':
                        Nn = self.fdata[k][ds][tk[2]].shape[0] - 1
                        height = hdt.calculaterootmeansqrerror(
                            self.fdata[k][ds][tk[2]][Nn, :],
                            self.fdata[k][ds][tk[3]])
                        error = np.std(
                            hdt.calculaterootmeansqrerror(
                                self.fdata[k][ds][tk[2]],
                                self.fdata[k][ds][tk[3]],
                                axis=1))

                        higt[tk[0]].append(height)
                        errs[tk[0]].append(error)

                x_axis = np.arange(len(higt[tk[0]][:-1]))
                #ax.set_yscale("log", nonposy='clip')
                rects.append(
                    ax.plot(x_axis,
                            higt[tk[0]][:-1],
                            '-o',
                            color=c,
                            linewidth=5,
                            label=tk[0]))
                ax.errorbar(x_axis,
                            higt[tk[0]][:-1],
                            yerr=errs[tk[0]][:-1],
                            fmt='.',
                            capsize=8,
                            elinewidth=3,
                            color=c,
                            ecolor=c,
                            markeredgewidth=2)

                ax.plot([-0.1, len(higt[tk[0]][:-1]) - 1 + 0.1],
                        [higt[tk[0]][-1], higt[tk[0]][-1]],
                        '--',
                        color=c,
                        linewidth=5)

                ax.legend(fontsize=fontsize,
                          bbox_to_anchor=bbox_to_anchor,
                          ncol=ncol)

                ax.set_title(ds, fontsize=fontsize + 2)
                ax.set_xticks(x_axis)
                ax.set_xticklabels([d for d in keys[:-1]])
                ax.set_ylabel(errortype, fontsize=fontsize)
                ax.set_xlabel('Active Learning Version', fontsize=fontsize)

                #ax.set_ylim([0.1,100])

            # add some text for labels, title and axes ticks
            #ax.set_title(p[0], fontsize=fontsize)
            #ax.set_xticks(ind + ((len(keys)+3)*width) / len(props))

            #if j == 0:
            #ax.legend(rects, keys, fontsize=fontsize, bbox_to_anchor=bbox_to_anchor, ncol=ncol)

        if storepath:
            pp = PdfPages(storepath)
            pp.savefig(fig)
            pp.close()
        else:
            plt.show()
Example #19
0
def plot_irc(axes, i, d, f):
    #print(f)
    Eact, xyz, spc, Rc = pyg.read_irc(d + f)
    Eact = hdt.hatokcal * Eact

    xyz = xyz[1:]
    Eact = Eact[1:]
    Rc = Rc[:-1]

    #print(Rc[:,1])
    #print(Eact-Eact.min() - Rc[:,1]-Rc[:,1].min())
    s_idx = f.split('IRC')[1].split('.')[0]
    hdt.writexyzfile(c + f.split('.')[0] + '.xyz', xyz, spc)
    #print(f.split('IRC')[1].split('.')[0],Rc.shape)
    if Rc.size > 10:
        #------------ CV NETWORKS 1 -----------
        energies1 = []
        N = 0
        for comp in nc1:
            comp.setConformers(confs=xyz, types=list(spc))
            energies1.append(hdt.hatokcal * comp.energy())
            N = N + 1

        energies2 = []
        N = 0
        for comp in nc2:
            comp.setConformers(confs=xyz, types=list(spc))
            energies2.append(hdt.hatokcal * comp.energy())
            N = N + 1

        modl_std1 = np.std(energies1, axis=0)[::-1]
        energies1 = np.mean(np.vstack(energies1), axis=0)

        modl_std2 = np.std(energies2, axis=0)[::-1]
        energies2 = np.mean(np.vstack(energies2), axis=0)

        rmse1 = hdt.calculaterootmeansqrerror(energies1, Eact)
        rmse2 = hdt.calculaterootmeansqrerror(energies2, Eact)

        dba = Eact.max() - Eact[0]
        db1 = energies1.max() - energies1[0]
        db2 = energies2.max() - energies2[0]

        rpa = Eact[0] - Eact[-1]
        rp1 = energies1[0] - energies1[-1]
        rp2 = energies2[0] - energies2[-1]

        bar1.append(abs(db1 - dba))
        bar2.append(abs(db2 - dba))

        rmp1.append(abs(rpa - rp1))
        rmp2.append(abs(rpa - rp2))

        Ec1.append(energies1)
        Ec2.append(energies2)
        Ea.append(Eact)

        print(i, ')', f, ':', len(spc), ':', rmse1, rmse2, 'R/P1: ',
              abs(rpa - rp1), 'R/P2: ', abs(rpa - rp2), 'Barrier1:',
              abs(db1 - dba), 'Barrier2:', abs(db2 - dba))

        Rce = hdt.hatokcal * (Rc[:, 0] - Rc[:, 0][0])
        Rce1 = energies2[::-1] - energies2[::-1][0]

        axes.set_xlim([Rc.min(), Rc.max()])
        axes.set_ylim([Rce.min() - 1.0, Rce1.max() + 20.0])

        axes.plot(Rc[:, 1],
                  hdt.hatokcal * (Rc[:, 0] - Rc[:, 0][0]),
                  color='Black',
                  label='DFT')

        axes.errorbar(Rc[:, 1],
                      energies2[::-1] - energies2[::-1][0],
                      yerr=modl_std2,
                      fmt='--',
                      color='red',
                      label="ANI-1: " + "{:.1f}".format(bar2[-1]),
                      linewidth=2)
        axes.errorbar(Rc[:, 1],
                      energies1[::-1] - energies1[::-1][0],
                      yerr=modl_std1,
                      fmt='--',
                      color='blue',
                      label="[" + str(i) + "]: " + "{:.1f}".format(bar1[-1]),
                      linewidth=2)
        #axes.set_xlabel("Reaction Coordinate $\AA$")
        #axes.set_ylabel(r"$\Delta E$ $ (kcal \times mol^{-1})$")
        #axes.plot(Rc[:, 1], energies2[::-1]-energies2[::-1][0],'--',color='red',label="["+str(i)+"]: "+"{:.1f}".format(bar2[-1]),linewidth=3)
        #axes.plot(Rc[:, 1], energies1[::-1]-energies1[::-1][0],'--',color='green',label="["+str(i)+"]: "+"{:.1f}".format(bar1[-1]),linewidth=3)

        axes.legend(loc="upper left", fontsize=10)
        axes.set_title(str(f),
                       color='black',
                       fontdict={'weight': 'bold'},
                       x=0.8,
                       y=0.85)
Example #20
0
def plot_corr_dist_axes(ax,
                        Xp,
                        Xa,
                        cmap,
                        labelx,
                        labely,
                        plabel,
                        vmin=0,
                        vmax=0):
    Fmx = Xa.max()
    Fmn = Xa.min()

    # Plot ground truth line
    ax.plot([Fmn, Fmx], [Fmn, Fmx], '--', c='red', linewidth=3)

    # Set labels
    ax.set_xlabel(labelx, fontsize=26)
    ax.set_ylabel(labely, fontsize=26)

    # Plot 2d Histogram
    if vmin == 0 and vmax == 0:
        bins = ax.hist2d(Xp,
                         Xa,
                         bins=200,
                         norm=LogNorm(),
                         range=[[Fmn, Fmx], [Fmn, Fmx]],
                         cmap=cmap)
    else:
        bins = ax.hist2d(Xp,
                         Xa,
                         bins=200,
                         norm=LogNorm(),
                         range=[[Fmn, Fmx], [Fmn, Fmx]],
                         cmap=cmap,
                         vmin=vmin,
                         vmax=vmax)

    # Build color bar
    #cbaxes = fig.add_axes([0.91, 0.1, 0.03, 0.8])

    # Annotate with label
    ax.text(0.25 * ((Fmx - Fmn)) + Fmn,
            0.06 * ((Fmx - Fmn)) + Fmn,
            plabel,
            fontsize=26)

    # Annotate with errors
    PMAE = hdt.calculatemeanabserror(Xa, Xp)
    PRMS = hdt.calculaterootmeansqrerror(Xa, Xp)
    ax.text(0.6 * ((Fmx - Fmn)) + Fmn,
            0.2 * ((Fmx - Fmn)) + Fmn,
            'MAE=' + "{:.3f}".format(PMAE) + '\nRMSE=' + "{:.3f}".format(PRMS),
            fontsize=30,
            bbox={
                'facecolor': 'white',
                'alpha': 0.5,
                'pad': 5
            })

    axins = zoomed_inset_axes(ax, 2., loc=2)  # zoom = 6

    sz = 0.1 * (Fmx - Fmn)
    axins.hist2d(Xp,
                 Xa,
                 bins=50,
                 range=[[Xa.mean() - sz, Xa.mean() + sz],
                        [Xp.mean() - sz, Xp.mean() + sz]],
                 norm=LogNorm(),
                 cmap=cmap)
    axins.plot([Xp.mean() - sz, Xp.mean() + sz],
               [Xp.mean() - sz, Xp.mean() + sz],
               '--',
               c='r',
               linewidth=3)

    # sub region of the original image
    x1, x2, y1, y2 = Xa.mean() - sz, Xa.mean() + sz, Xp.mean() - sz, Xp.mean(
    ) + sz
    axins.set_xlim(x1, x2)
    axins.set_ylim(y1, y2)
    axins.yaxis.tick_right()

    plt.xticks(visible=True)
    plt.yticks(visible=True)

    mark_inset(ax, axins, loc1=1, loc2=3, fc="none", ec="1.5")
    return bins
Example #21
0
    Fdft = hdn.hatokcal * Fdft  #.reshape(-1)

    idx = np.asarray(np.where(sigma < 0.08))[0]
    #print(idx,Fani[0].shape,Fdft.shape)
    Ferr.append((Fani[0][idx] - Fdft[idx]).flatten())

    # Calculate full dE
    dEani = hdn.calculateKdmat(Ncv, Eani)
    dEdft = hdn.calculatedmat(Edft)

    # Calculate per molecule errors
    FMAE = hdn.calculatemeanabserror(Fani.reshape(Ncv, -1),
                                     Fdft.reshape(-1),
                                     axis=1)
    FRMSE = hdn.calculaterootmeansqrerror(Fani.reshape(Ncv, -1),
                                          Fdft.reshape(-1),
                                          axis=1)

    #plt.hist((Fani-Fdft).flatten(),bins=100)
    # plt.show()
    '''
    if Emax[0] < np.abs((Eani-Edft)).max():
        ind = np.argmax(np.abs((Eani-Edft)).flatten())
        Emax[0] = (Eani-Edft).flatten()[ind]
        Emax[1] = Eani.flatten()[ind]
        Emax[2] = Edft.flatten()[ind]

    if Fmax[0] < np.abs((Fani-Fdft)).max():
        ind = np.argmax(np.abs((Fani-Fdft)).flatten())
        Fmax[0] = (Fani-Fdft).flatten()[ind]
        Fmax[1] = Fani.flatten()[ind]
Example #22
0
    def generate_fullset_errors(self, ntkey, tslist):
        #idx = np.nonzero(self.fdata[ntkey][tskey]['Erdft'])
        #tskeys = self.fdata[ntkey].keys()

        if not tslist:
            tskeys = self.fdata[ntkey].keys()
        else:
            tskeys = tslist

        Nn = self.fdata[ntkey][list(tskeys)[0]]['Eani'].shape[0] - 1
        #print(self.fdata[ntkey][tskey]['Fdft'].shape)
        return {
            names[0]:
            hdt.calculatemeanabserror(
                np.concatenate([
                    self.fdata[ntkey][tskey]['Eani'][Nn, :] for tskey in tskeys
                ]),
                np.concatenate(
                    [self.fdata[ntkey][tskey]['Edft'] for tskey in tskeys])),
            names[1]:
            np.std(
                hdt.calculatemeanabserror(np.hstack([
                    self.fdata[ntkey][tskey]['Eani'][0:Nn, :]
                    for tskey in tskeys
                ]),
                                          np.hstack([
                                              self.fdata[ntkey][tskey]['Edft']
                                              for tskey in tskeys
                                          ]),
                                          axis=1)),
            names[2]:
            hdt.calculaterootmeansqrerror(
                np.concatenate([
                    self.fdata[ntkey][tskey]['Eani'][Nn, :] for tskey in tskeys
                ]),
                np.concatenate(
                    [self.fdata[ntkey][tskey]['Edft'] for tskey in tskeys])),
            names[3]:
            np.std(
                hdt.calculaterootmeansqrerror(
                    np.hstack([
                        self.fdata[ntkey][tskey]['Eani'][0:Nn, :]
                        for tskey in tskeys
                    ]),
                    np.hstack([
                        self.fdata[ntkey][tskey]['Edft'] for tskey in tskeys
                    ]),
                    axis=1)),
            names[4]:
            hdt.calculatemeanabserror(
                np.concatenate([
                    self.fdata[ntkey][tskey]['dEani'][Nn, :]
                    for tskey in tskeys
                ]),
                np.concatenate(
                    [self.fdata[ntkey][tskey]['dEdft'] for tskey in tskeys])),
            names[5]:
            np.std(
                hdt.calculatemeanabserror(np.hstack([
                    self.fdata[ntkey][tskey]['dEani'][0:Nn, :]
                    for tskey in tskeys
                ]),
                                          np.hstack([
                                              self.fdata[ntkey][tskey]['dEdft']
                                              for tskey in tskeys
                                          ]),
                                          axis=1)),
            names[6]:
            hdt.calculaterootmeansqrerror(
                np.concatenate([
                    self.fdata[ntkey][tskey]['dEani'][Nn, :]
                    for tskey in tskeys
                ]),
                np.concatenate(
                    [self.fdata[ntkey][tskey]['dEdft'] for tskey in tskeys])),
            names[7]:
            np.std(
                hdt.calculaterootmeansqrerror(
                    np.hstack(
                        [
                            self.fdata[ntkey][tskey]['dEani'][0:Nn, :]
                            for tskey in tskeys
                        ]),
                    np.hstack([
                        self.fdata[ntkey][tskey]['dEdft'] for tskey in tskeys
                    ]),
                    axis=1)),
            names[8]:
            hdt.calculatemeanabserror(
                np.concatenate([
                    self.fdata[ntkey][tskey]['Fani'][Nn, :] for tskey in tskeys
                ]),
                np.concatenate(
                    [self.fdata[ntkey][tskey]['Fdft'] for tskey in tskeys])),
            names[9]:
            np.std(
                hdt.calculatemeanabserror(np.hstack([
                    self.fdata[ntkey][tskey]['Fani'][0:Nn, :]
                    for tskey in tskeys
                ]),
                                          np.hstack([
                                              self.fdata[ntkey][tskey]['Fdft']
                                              for tskey in tskeys
                                          ]),
                                          axis=1)),
            names[10]:
            hdt.calculaterootmeansqrerror(
                np.concatenate([
                    self.fdata[ntkey][tskey]['Fani'][Nn, :] for tskey in tskeys
                ]),
                np.concatenate(
                    [self.fdata[ntkey][tskey]['Fdft'] for tskey in tskeys])),
            names[11]:
            np.std(
                hdt.calculaterootmeansqrerror(
                    np.hstack(
                        [
                            self.fdata[ntkey][tskey]['Fani'][0:Nn, :]
                            for tskey in tskeys
                        ]),
                    np.hstack([
                        self.fdata[ntkey][tskey]['Fdft'] for tskey in tskeys
                    ]),
                    axis=1)),
            #'FMAEm': hdt.calculatemeanabserror(self.fdata[ntkey][tskey]['Fani'][Nn,:], self.fdata[ntkey][tskey]['Fdft']),
            #'FMAEs': np.std(hdt.calculatemeanabserror(self.fdata[ntkey][tskey]['Fani'][0:Nn,:], self.fdata[ntkey][tskey]['Fdft'], axis=1)),
            #'FRMSm': hdt.calculaterootmeansqrerror(self.fdata[ntkey][tskey]['Fani'][Nn,:], self.fdata[ntkey][tskey]['Fdft']),
            #'FRMSs': np.std(hdt.calculaterootmeansqrerror(self.fdata[ntkey][tskey]['Fani'][0:Nn, :],self.fdata[ntkey][tskey]['Fdft'], axis=1)),
            #'dEMAE': hdt.calculatemeanabserror(self.fdata[ntkey][tskey]['dEani'], self.fdata[ntkey][tskey]['dEdft']),
            #'dERMS': hdt.calculaterootmeansqrerror(self.fdata[ntkey][tskey]['dEani'], self.fdata[ntkey][tskey]['dEdft']),
            #'ERMAE': hdt.calculatemeanabserror(self.fdata[ntkey][tskey]['Erani'][idx], self.fdata[ntkey][tskey]['Erdft'][idx]),
            #'ERRMS': hdt.calculaterootmeansqrerror(self.fdata[ntkey][tskey]['Erani'][idx], self.fdata[ntkey][tskey]['rdft'][idx]),
        }
EA = np.concatenate(EA)
dxl = dx
dx = np.concatenate(dx)

for i, x in enumerate(dxl):
    if i % 3 == 0:
        plt.hist(x, bins=50, histtype=u'step')
        #plt.plot(np.array(range(0,x.shape[0])),x)

#plt.ylabel('Rc $(\AA)$')
#plt.xlabel('Step')
plt.ylabel('count')
plt.xlabel('$(\AA)$')
plt.show()
# Plot
errn = hdt.calculaterootmeansqrerror(E1, EA)
plt.scatter(dx, E1, color='red', label="{:.2f}".format(errn), linewidth=1)
plt.scatter(dx, EA, color='black', linewidth=1)
plt.plot(np.array([np.linalg.norm(m[atm] - xr) for m in datairc[0]]),
         hdt.hatokcal * datairc[2],
         marker='o',
         color='blue',
         linewidth=3)

plt.suptitle("Double bond migration IRCs")

#plt.ylabel('E (kcal/mol)')
#plt.xlabel('Distance $\AA$')
plt.legend(bbox_to_anchor=(0.05, 0.95), loc=2, borderaxespad=0., fontsize=16)

plt.show()
Example #24
0
    def generate_fullset_mean_errors(self, ntkey):
        #idx = np.nonzero(self.fdata[ntkey][tskey]['Erdft'])
        tskeys = self.fdata[ntkey].keys()

        Nn = self.fdata[ntkey][list(tskeys)[0]]['Eani'].shape[0] - 1
        return {
            names[2] + 'E':
            hdt.calculaterootmeansqrerror(
                np.concatenate([
                    self.fdata[ntkey][tskey]['Eani'][Nn, :] for tskey in tskeys
                ]),
                np.concatenate(
                    [self.fdata[ntkey][tskey]['Edft'] for tskey in tskeys])),
            names[2] + 'M':
            np.mean(
                hdt.calculaterootmeansqrerror(
                    np.hstack([
                        self.fdata[ntkey][tskey]['Eani'][0:Nn, :]
                        for tskey in tskeys
                    ]),
                    np.hstack([
                        self.fdata[ntkey][tskey]['Edft'] for tskey in tskeys
                    ]),
                    axis=1)),
            names[6] + 'E':
            hdt.calculaterootmeansqrerror(
                np.concatenate([
                    self.fdata[ntkey][tskey]['dEani'][Nn, :]
                    for tskey in tskeys
                ]),
                np.concatenate(
                    [self.fdata[ntkey][tskey]['dEdft'] for tskey in tskeys])),
            names[6] + 'M':
            np.mean(
                hdt.calculaterootmeansqrerror(
                    np.hstack([
                        self.fdata[ntkey][tskey]['dEani'][0:Nn, :]
                        for tskey in tskeys
                    ]),
                    np.hstack([
                        self.fdata[ntkey][tskey]['dEdft'] for tskey in tskeys
                    ]),
                    axis=1)),
            names[10] + 'E':
            hdt.calculaterootmeansqrerror(
                np.concatenate([
                    self.fdata[ntkey][tskey]['Fani'][Nn, :] for tskey in tskeys
                ]),
                np.concatenate(
                    [self.fdata[ntkey][tskey]['Fdft'] for tskey in tskeys])),
            names[10] + 'M':
            np.mean(
                hdt.calculaterootmeansqrerror(
                    np.hstack([
                        self.fdata[ntkey][tskey]['Fani'][0:Nn, :]
                        for tskey in tskeys
                    ]),
                    np.hstack([
                        self.fdata[ntkey][tskey]['Fdft'] for tskey in tskeys
                    ]),
                    axis=1)),
        }
            deltas = gt.hatokcal * np.abs(Ecmp_t -
                                          np.array(Eact_t, dtype=float))
            Me = max(deltas)
            if Me > Herror:
                Herror = Me
                Wfile = ''  #data['parent'] + '/' + data['child']

            Le = min(deltas)
            if Le < Lerror:
                Lerror = Le
                Bfile = ''  #data['parent'] + '/' + data['child']

            #print (gt.hatokcal * gt.calculaterootmeansqrerror(np.array(Eact_t, dtype=float),Ecmp_t))

            tNa = nc.getNumAtoms()
            err.append(gt.hatokcal * gt.calculaterootmeansqrerror(
                np.array(Eact_t, dtype=float), Ecmp_t) / float(tNa))
            sze.append(float(len(Eact_t)))

            time += _t2b

            Ecmp += Ecmp_t
            Eact += Eact_t
            #print('FILE: ', data['child'],' Energy: ', gt.hatokcal * np.array(Eact_t).min(),' Error: ', gt.hatokcal * gt.calculaterootmeansqrerror(np.array(Eact_t),np.array(Ecmp_t)))
            cnt = cnt + 1

_timeloop2 = (tm.time() - _timeloop)
print('Computation complete. Time: ' + "{:.4f}".format(_timeloop2) + 'ms')

adl.cleanup()

#plt_by_index(np.array(Eerr),-1)
Example #26
0
 def generate_total_errors(self, ntkey, tskey):
     #idx = np.nonzero(self.fdata[ntkey][tskey]['Erdft'])
     Nn = self.fdata[ntkey][tskey]['Eani'].shape[0] - 1
     return {
         names[0]:
         hdt.calculatemeanabserror(self.fdata[ntkey][tskey]['Eani'][Nn, :],
                                   self.fdata[ntkey][tskey]['Edft']),
         names[1]:
         np.std(
             hdt.calculatemeanabserror(
                 self.fdata[ntkey][tskey]['Eani'][0:Nn, :],
                 self.fdata[ntkey][tskey]['Edft'],
                 axis=1)),
         names[2]:
         hdt.calculaterootmeansqrerror(
             self.fdata[ntkey][tskey]['Eani'][Nn, :],
             self.fdata[ntkey][tskey]['Edft']),
         names[3]:
         np.std(
             hdt.calculaterootmeansqrerror(
                 self.fdata[ntkey][tskey]['Eani'][0:Nn, :],
                 self.fdata[ntkey][tskey]['Edft'],
                 axis=1)),
         names[4]:
         hdt.calculatemeanabserror(self.fdata[ntkey][tskey]['dEani'][Nn, :],
                                   self.fdata[ntkey][tskey]['dEdft']),
         names[5]:
         np.std(
             hdt.calculatemeanabserror(
                 self.fdata[ntkey][tskey]['dEani'][0:Nn, :],
                 self.fdata[ntkey][tskey]['dEdft'],
                 axis=1)),
         names[6]:
         hdt.calculaterootmeansqrerror(
             self.fdata[ntkey][tskey]['dEani'][Nn, :],
             self.fdata[ntkey][tskey]['dEdft']),
         names[7]:
         np.std(
             hdt.calculaterootmeansqrerror(
                 self.fdata[ntkey][tskey]['dEani'][0:Nn, :],
                 self.fdata[ntkey][tskey]['dEdft'],
                 axis=1)),
         names[8]:
         hdt.calculatemeanabserror(self.fdata[ntkey][tskey]['Fani'][Nn, :],
                                   self.fdata[ntkey][tskey]['Fdft']),
         names[9]:
         np.std(
             hdt.calculatemeanabserror(
                 self.fdata[ntkey][tskey]['Fani'][0:Nn, :],
                 self.fdata[ntkey][tskey]['Fdft'],
                 axis=1)),
         names[10]:
         hdt.calculaterootmeansqrerror(
             self.fdata[ntkey][tskey]['Fani'][Nn, :],
             self.fdata[ntkey][tskey]['Fdft']),
         names[11]:
         np.std(
             hdt.calculaterootmeansqrerror(
                 self.fdata[ntkey][tskey]['Fani'][0:Nn, :],
                 self.fdata[ntkey][tskey]['Fdft'],
                 axis=1)),
         #'dEMAE': hdt.calculatemeanabserror(self.fdata[ntkey][tskey]['dEani'], self.fdata[ntkey][tskey]['dEdft']),
         #'dERMS': hdt.calculaterootmeansqrerror(self.fdata[ntkey][tskey]['dEani'], self.fdata[ntkey][tskey]['dEdft']),
         #'ERMAE': hdt.calculatemeanabserror(self.fdata[ntkey][tskey]['Erani'][idx], self.fdata[ntkey][tskey]['Erdft'][idx]),
         #'ERRMS': hdt.calculaterootmeansqrerror(self.fdata[ntkey][tskey]['Erani'][idx], self.fdata[ntkey][tskey]['rdft'][idx]),
     }
Example #27
0
hdn.writexyzfile('/home/jujuman/crds.xyz', xyz, data[2][0])

# Set required files for pyNeuroChem
#wkdir    = '/home/jujuman/Dropbox/ChemSciencePaper.AER/ANI-c08e-ntwk/'
wkdir = '/home/jujuman/Research/GDB-11-wB97X-6-31gd/ANI-c08e-ntwk_newtrain/'
cnstfile = wkdir + 'rHCNO-4.6A_16-3.1A_a4-8.params'
saefile = wkdir + 'sae_6-31gd.dat'
nnfdir = wkdir + 'networks/'

# Construct pyNeuroChem class
mol = pync.conformers(cnstfile, saefile, nnfdir, 0)

mol.setConformers(confs=xyz, types=list(data[2][0]))

E = hdn.hatokcal * mol.energy()

rmse = hdn.calculaterootmeansqrerror(df_E, E)

x = list(range(0, df_E.shape[0]))
#x = np.linalg.norm(xyz[:,3,:]-xyz[0,3,:],axis=1)

#print(x)

plt.scatter(x, df_E, label='DFT')
plt.scatter(x, E, label='ANI err: ' + str(rmse) + ' kcal/mol')
plt.xlabel('Distance ($\AA$)')
plt.ylabel('Energy (kcal/mol)')
plt.legend(bbox_to_anchor=(0.4, 0.99), loc=2, borderaxespad=0., fontsize=14)

plt.show()
Example #28
0
    def determine_min_error_by_sigma(self,
                                     ntkey,
                                     minerror,
                                     percent,
                                     tskeys=['GDB07to09'],
                                     figsize=(15.0, 12.0),
                                     labelx='',
                                     labely='',
                                     xyrange=(0.0, 10.0, 0.0, 10.0),
                                     storepath='',
                                     cmap=mpl.cm.viridis):
        #tskeys = self.fdata[ntkey].keys()

        mpl.rcParams['xtick.labelsize'] = 18
        mpl.rcParams['ytick.labelsize'] = 18

        Nn = self.fdata[ntkey][list(tskeys)[0]]['Eani'].shape[0] - 1

        Eani = np.hstack(
            [self.fdata[ntkey][tskey]['Eani'][0:Nn, :] for tskey in tskeys])
        Eanimu = np.hstack(
            [self.fdata[ntkey][tskey]['Eani'][Nn, :] for tskey in tskeys])

        #Eani = np.hstack([self.fdata[ntkey][tskey]['Eani'][Nn, :] for tskey in tskeys])
        Edft = np.concatenate(
            [self.fdata[ntkey][tskey]['Edft'] for tskey in tskeys])
        #print(Eani.shape, Edft.shape, )
        #print(np.max(Eerr.shape, axis=0))
        Sani = np.concatenate([
            np.std(self.fdata[ntkey][tskey]['Eani'][0:Nn, :], axis=0)
            for tskey in tskeys
        ])
        Na = np.concatenate(
            [self.fdata[ntkey][tskey]['Na'] for tskey in tskeys])

        #print(Sani.shape, Na.shape)
        Sani = Sani / np.sqrt(Na)
        Eerr = np.max(np.abs(Eani - Edft), axis=0) / np.sqrt(Na)
        #Eerr = np.abs(np.mean(Eani,axis=0) - Edft) / np.sqrt(Na)
        #Eerr = np.abs(Eani - Edft) / np.sqrt(Na)
        #print(Eerr)
        #print(Sani)

        Nmax = np.where(Eerr > minerror)[0].size

        perc = 0
        dS = Sani.max()
        step = 0
        while perc < percent:
            S = dS - step * 0.001
            Sidx = np.where(Sani > S)
            step += 1

            perc = 100.0 * np.where(Eerr[Sidx] > minerror)[0].size / (Nmax +
                                                                      1.0E-7)
            #print(step,perc,S,Sidx)
        #print('Step:',step, 'S:',S,'  -Perc over:',perc,'Total',100.0*Sidx[0].size/Edft.size)

        #dE = np.max(Eerr, axis=0) / np.sqrt(Na)
        #print(Eerr.shape,Eerr)

        So = np.where(Sani > S)
        Su = np.where(Sani <= S)

        print('RMSE Over:  ',
              hdt.calculaterootmeansqrerror(Eanimu[So], Edft[So]))
        print('RMSE Under: ',
              hdt.calculaterootmeansqrerror(Eanimu[Su], Edft[Su]))

        fig, ax = plt.subplots(figsize=figsize)

        poa = np.where(Eerr[So] > minerror)[0].size / So[0].size
        pob = np.where(Eerr > minerror)[0].size / Eerr.size

        ax.text(
            0.57 * (xyrange[1]),
            0.04 * (xyrange[3]),
            'Total Captured:    ' +
            str(int(100.0 * Sidx[0].size / Edft.size)) + '%' + '\n' +
            r'($\mathrm{\mathcal{E}>}$' + "{:.1f}".format(minerror) +
            r'$\mathrm{) \forall \rho}$:           ' + str(int(100 * pob)) +
            '%' + '\n' + r'($\mathrm{\mathcal{E}>}$' +
            "{:.1f}".format(minerror) + r'$\mathrm{) \forall \rho >}$' +
            "{:.2f}".format(S) + ': ' + str(int(100 * poa)) + '%' + '\n' +
            r'$\mathrm{E}$ RMSE ($\mathrm{\rho>}$' + "{:.2f}".format(S) +
            r'$\mathrm{)}$: ' + "{:.1f}".format(
                hdt.calculaterootmeansqrerror(Eanimu[So], Edft[So])) + '\n' +
            r'$\mathrm{E}$ RMSE ($\mathrm{\rho\leq}$' + "{:.2f}".format(S) +
            r'$\mathrm{)}$: ' + "{:.1f}".format(
                hdt.calculaterootmeansqrerror(Eanimu[Su], Edft[Su])),
            bbox={
                'facecolor': 'grey',
                'alpha': 0.5,
                'pad': 10
            },
            fontsize=18)

        plt.axvline(x=S,
                    linestyle='--',
                    color='r',
                    linewidth=5,
                    label=r"$\mathrm{\rho=}$" + "{:.2f}".format(S) +
                    ' is the value that captures\n' + str(int(percent)) +
                    '% of errors over ' + r"$\mathrm{\mathcal{E}=}$" +
                    "{:.1f}".format(minerror))
        #)
        # Set labels
        ax.set_xlabel(labelx, fontsize=24)
        ax.set_ylabel(labely, fontsize=24)

        # Plot 2d Histogram
        bins = ax.hist2d(Sani,
                         Eerr,
                         bins=400,
                         norm=LogNorm(),
                         range=[[xyrange[0], xyrange[1]],
                                [xyrange[2], xyrange[3]]],
                         cmap=cmap)

        # Build color bar
        # cbaxes = fig.add_axes([0.91, 0.1, 0.03, 0.8])
        cb1 = fig.colorbar(bins[-1], cmap=cmap)
        cb1.set_label('Count', fontsize=20)
        cb1.ax.tick_params(labelsize=18)
        plt.legend(loc='upper center', fontsize=18)

        if storepath:
            pp = PdfPages(storepath)
            pp.savefig(fig)
            pp.close()
        else:
            plt.show()
    2.0,
    1.9,
    2.0,
])

popt, pcov = curve_fit(hdt.buckingham_pot,
                       xt_data,
                       yt_data,
                       p0=p0,
                       bounds=bounds)  # NN

print(popt)
iEc = hdt.buckingham_pot(xv_data, *p0)
fEc = hdt.buckingham_pot(xv_data, *popt)

irmse = hdt.calculaterootmeansqrerror(iEc, yv_data)
frmse = hdt.calculaterootmeansqrerror(fEc, yv_data)

np.savez('mp_ani_params_test.npz', param=popt)

print('Final RMSE:', hdt.hatokcal * frmse, ' Initial RMSE:',
      hdt.hatokcal * irmse)

plt.plot(yv_data, yv_data, color='black', label='Act')
plt.scatter(yv_data, iEc, color='red', label='Init')
plt.scatter(yv_data, fEc, color='blue', label='Fit')

plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.show()
def plot_irc_data(axes, file, rcf, title):
    xyz, typ, Eact = hdt.readncdat(file, np.float32)
    Rc = np.load(rcf)

    # Set required files for pyNeuroChem
    wkdir = '/home/jujuman/Dropbox/ChemSciencePaper.AER/networks/ANI-c08f-ntwk-cv/'
    cnstfile = 'rHCNO-4.6A_16-3.1A_a4-8.params'
    saefile = 'sae_6-31gd.dat'

    nc = [
        pync.conformers(wkdir + cnstfile, wkdir + saefile,
                        wkdir + 'cv_c08e_ntw_' + str(l) + '/networks/', 0)
        for l in range(5)
    ]

    rcdir = '/home/jujuman/Research/ANI-DATASET/RXN1_TNET/training/rxn1to6/ani_benz_rxn_ntwk/'
    ncr1 = pync.conformers(rcdir + '../../' + cnstfile,
                           rcdir + '../../' + saefile, rcdir + '/networks/', 0)
    ncr2 = pync.molecule(rcdir + '../../' + cnstfile,
                         rcdir + '../../' + saefile, rcdir + '/networks/', 0)
    ncr3 = pync.molecule(rcdir + '../../' + cnstfile,
                         rcdir + '../../' + saefile, rcdir + '/networks/', 0)

    # Compute reactant E
    ncr2.setMolecule(coords=xyz[0], types=list(typ))
    Er = ncr2.energy()

    # Compute product E
    ncr3.setMolecule(coords=xyz[-1], types=list(typ))
    Ep = ncr3.energy()

    #Eact = Eact[::-1]

    dE_ani = hdt.hatokcal * (Er - Ep)
    dE_dft = hdt.hatokcal * (Eact[0] - Eact[-1])
    print('Delta E R/P ANI:', dE_ani, 'Delta E R/P ANI:', dE_dft, 'Diff:',
          abs(dE_ani - dE_dft))

    # Set the conformers in NeuroChem
    ncr1.setConformers(confs=xyz, types=list(typ))

    # Compute Energies of Conformations
    E1 = ncr1.energy()

    # Shift
    E1 = E1 - E1[0]
    Eact = Eact - Eact[0]

    # Plot
    errn = hdt.calculaterootmeansqrerror(hdt.hatokcal * E1,
                                         hdt.hatokcal * Eact)
    axes.plot(Rc['x'][:, 1],
              hdt.hatokcal * (E1),
              color='red',
              label="{:.2f}".format(errn),
              linewidth=2)

    axes.plot(Rc['x'][:, 1],
              hdt.hatokcal * (Eact),
              'r--',
              color='black',
              linewidth=3)

    err = []

    for n, net in enumerate(nc):
        # Set the conformers in NeuroChem
        net.setConformers(confs=xyz, types=list(typ))

        # Compute Energies of Conformations
        E1 = net.energy()
        E1 = E1 - E1[0]

        err.append(
            hdt.calculaterootmeansqrerror(hdt.hatokcal * E1,
                                          hdt.hatokcal * Eact))

        # Plot
        if n == len(nc) - 1:
            mean = np.mean(np.asarray(err))
            axes.plot(Rc['x'][:, 1],
                      hdt.hatokcal * (E1),
                      color='blue',
                      label="{:.2f}".format(mean),
                      linewidth=1)
        else:
            axes.plot(Rc['x'][:, 1],
                      hdt.hatokcal * (E1),
                      color='blue',
                      linewidth=1)

            axes.plot(Rc['x'][:, 1],
                      hdt.hatokcal * (E1),
                      color='blue',
                      linewidth=1)

    axes.set_xlim([Rc['x'][:, 1].min(), Rc['x'][:, 1].max()])
    axes.legend(loc="upper right", fontsize=8)
    axes.set_title(title)
    return np.array([errn, np.mean(err)])