def pyNCcomputeTestSet(cnstfile1, saefile1, nnfdir1, dtdir, dtdftpref, dtpm6dir, dtpm6pref, N, P=1.0): # Construct pyNeuroChem classes nc = pync.pyNeuroChem(cnstfile1, saefile1, nnfdir1, 0) Eact = [] Ecmp = [] Eotr = [] Ndat = 0 Nmol = 0 t = 0.0 for i in range(0, N): print('|----- File ' + str(i) + ' -----|') print('Name: ' + dtdir + dtdftpref + str(i) + '_test.dat') rv = bool(np.random.binomial(1, P)) if (os.path.isfile(dtdir + dtdftpref + str(i) + '_test.dat') and rv): xyz, typ, Eact_t = gt.readncdat(dtdir + dtdftpref + str(i) + '_test.dat') xyz1, typ1, Eotr_t = gt.readncdat(dtpm6dir + dtpm6pref + str(i) + '_test.dat') if len(Eact_t) == len(Eotr_t): Eact += shiftlsttomin(Eact_t) Eotr += shiftlsttomin(Eotr_t) #Eact += Eact_t #Eotr += Eotr_t Nmol += 1 Ndat += len(Eact_t) # Set the conformers in NeuroChem nc.setConformers(confs=xyz, types=typ) # Compute Forces of Conformations print('Computing energies 1...') _t1b = tm.time() Ecmp_t = nc.computeEnergies() _t2b = (tm.time() - _t1b) * 1000.0 t += _t2b print('Computation complete 1. Time: ' + "{:.4f}".format(_t2b) + 'ms') Ecmp += shiftlsttomin(Ecmp_t) #Ecmp += Ecmp_t Eact = np.array(Eact, dtype=float) Eotr = np.array(Eotr, dtype=float) Ecmp = np.array(Ecmp, dtype=float) return Eact, Ecmp, Eotr, Ndat, Nmol, t
def pyNCcomputeTestSet(cnstfile1,saefile1,nnfdir1,dtdir,dtdftpref,dtpm6dir,dtpm6pref,N,P=1.0): # Construct pyNeuroChem classes nc = pync.pyNeuroChem(cnstfile1,saefile1,nnfdir1,0) Eact = [] Ecmp = [] Eotr = [] Ndat = 0 Nmol = 0 t = 0.0 for i in range(0,N): rv = bool(np.random.binomial(1,P)) if (os.path.isfile(dtdir + dtdftpref + str(i) + '_test.dat') and rv): xyz,typ,Eact_t,chk = gt.readncdat(dtdir + dtdftpref + str(i) + '_test.dat') xyz1,typ1,Eotr_t,chk = gt.readncdat(dtpm6dir + dtpm6pref + str(i) + '_test.dat') if len(Eact_t) == len(Eotr_t): #print ('|----- File ' + str(i) + ' -----|') #print ('Name: ' + dtdir + dtdftpref + str(i) + '_test.dat') Eact += shiftlsttomin( Eact_t ) Eotr += shiftlsttomin( Eotr_t ) #Eact += Eact_t #Eotr += Eotr_t Nmol += 1 Ndat += len( Eact_t ) # Set the conformers in NeuroChem nc.setConformers(confs=xyz,types=typ) # Compute Forces of Conformations print(' ' + str(Nmol) + ') Computing ' + str(len( Eact_t )) + ' energies...') _t1b = tm.time() Ecmp_t = nc.computeEnergies() _t2b = (tm.time() - _t1b) * 1000.0 t += _t2b #print('Computation complete. Time: ' + "{:.4f}".format(_t2b) + 'ms') Ecmp += shiftlsttomin( Ecmp_t ) #Ecmp += Ecmp_t99 else: print (str(len(Eact_t)) + '!=' + str(len(Eotr_t)) + ' File: ' + dtdir + dtdftpref + str(i) + '_test.dat') else: print('File not found: ' + dtdir + dtdftpref + str(i) + '_test.dat') Eact = np.array(Eact,dtype=float) Eotr = np.array(Eotr,dtype=float) Ecmp = np.array(Ecmp,dtype=float) return Eact,Ecmp,Eotr,Ndat,Nmol,t
def read_data_files_convert_cm(file, N): # Get training molecules xyz_tr, typ_tr, Eact_tr, readf = gt.readncdat(file, np.float32) # Compute energy of atoms at infinite separation ise = cm.computerISE(typ_tr) Eact_tr = (Eact_tr - ise) Nm = Eact_tr.shape[0] cmat = cm.GenCMatData2(xyz_tr, typ_tr, N) return cmat, Eact_tr, ise
plt.rc('font', **font) # Set required files for pyNeuroChem wkdir = '/home/jujuman/Research/GDB-11-wB97X-6-31gd/train_08_5/' # Network Files cnstfile = wkdir + 'rHCNO-4.7A_32-3.2A_a8-8.params' saefile = wkdir + 'sae_6-31gd.dat' nnfdir = wkdir + 'networks/' # Construct pyNeuroChem classes nc = pync.pyNeuroChem(cnstfile, saefile, nnfdir, 0) # Read nc DATA xyz, typ, Eact = gt.readncdat( '/home/jujuman/Dropbox/ChemSciencePaper.AER/TestCases/Retinol/data/retinolconformer_DFT.dat' ) xyz1, typ1, Eact1 = gt.readncdat( '/home/jujuman/Dropbox/ChemSciencePaper.AER/TestCases/Retinol/data/retinolconformer_DFTB.dat' ) Eact = np.array(Eact) Eact1 = np.array(Eact1) # Set the conformers in NeuroChem nc.setConformers(confs=xyz, types=typ) # Print some data from the NeuroChem print('Number of Atoms Loaded: ' + str(nc.getNumAtoms())) print('Number of Confs Loaded: ' + str(nc.getNumConfs()))
def produce_scan(ax,title,xlabel,cnstfile,saefile,nnfdir,dtdir,dt1,dt2,dt3,smin,smax,iscale,ishift): xyz, typ, Eact, chk = gt.readncdat(dtdir + dt1,np.float32) xyz2, typ2, Eact2, chk = gt.readncdat(dtdir + dt2) xyz3, typ3, Eact3, chk = gt.readncdat(dtdir + dt3) #gt.writexyzfile("/home/jujuman/Dropbox/ChemSciencePaper.AER/TestCases/Dihedrals/4-Cyclohexyl-1-butanol/optimization/dihedral_"+dt1+".xyz",xyz,typ) #Eact = np.array(Eact) #Eact2 = np.array(Eact2) #Eact3 = np.array(Eact3) # Construct pyNeuroChem classes nc1 = pync.pyNeuroChem(cnstfile, saefile, nnfdir, 1) # Set the conformers in NeuroChem nc1.setConformers(confs=xyz, types=typ) # Print some data from the NeuroChem print('1) Number of Atoms Loaded: ' + str(nc1.getNumAtoms())) print('1) Number of Confs Loaded: ' + str(nc1.getNumConfs())) # Compute Forces of Conformations print('Computing energies 1...') _t1b = tm.time() Ecmp1 = nc1.energy() print('Computation complete 1. Time: ' + "{:.4f}".format((tm.time() - _t1b) * 1000.0) + 'ms') n = smin m = smax Ecmp1 = gt.hatokcal * Ecmp1 Eact = gt.hatokcal * Eact Eact2 = gt.hatokcal * Eact2 Eact3 = gt.hatokcal * Eact3 IDX = np.arange(0, Eact.shape[0], 1, dtype=float) * iscale + ishift IDX = IDX[n:m] Eact = Eact[n:m] Eact2 = Eact2[n:m] Eact3 = Eact3[n:m] Ecmp1 = Ecmp1[n:m] Ecmp1 = Ecmp1 - Ecmp1.min() Eact = Eact - Eact.min() Eact2 = Eact2 - Eact2.min() Eact3 = Eact3 - Eact3.min() rmse1 = gt.calculaterootmeansqrerror(Eact, Ecmp1) rmse3 = gt.calculaterootmeansqrerror(Eact, Eact2) rmse4 = gt.calculaterootmeansqrerror(Eact, Eact3) print("Spearman corr. 1: " + "{:.3f}".format(st.spearmanr(Ecmp1, Eact)[0])) print("Spearman corr. 2: " + "{:.3f}".format(st.spearmanr(Eact2, Eact)[0])) print("Spearman corr. 3: " + "{:.3f}".format(st.spearmanr(Eact3, Eact)[0])) ax.plot(IDX, Eact, '-', marker=r'o', color='black', label='DFT', linewidth=2, markersize=7) ax.plot(IDX, Ecmp1, ':', marker=r'D', color='red', label='ANI-1 RMSE: ' + '%s' % float('%.3g' % rmse1) + ' kcal/mol', linewidth=2, markersize=5) ax.plot(IDX, Eact2, ':', marker=r'v', color='blue', label='DFTB RMSE: ' + '%s' % float('%.3g' % rmse3) + ' kcal/mol', linewidth=2, markersize=5) ax.plot(IDX, Eact3, ':', marker=r'*', color='orange', label='PM6 RMSE: ' + '%s' % float('%.3g' % rmse4) + ' kcal/mol', linewidth=2, markersize=7) #ax.plot(IDX, Eact, color='black', label='DFT', linewidth=3) #ax.scatter(IDX, Eact, marker='o', color='black', linewidth=4) th = ax.set_title(title,fontsize=16) th.set_position([0.5,1.005]) # Set Limits ax.set_xlim([ IDX.min(),IDX.max()]) ax.set_ylim([Eact.min()-1.0,Eact.max()+1.0]) ax.set_ylabel('$\Delta$E calculated (kcal/mol)') ax.set_xlabel(xlabel) ax.legend(bbox_to_anchor=(0.2, 0.98), loc=2, borderaxespad=0., fontsize=14)
import numpy as np from scipy import stats as st # Set required files for pyNeuroChem wkdir = '/home/jujuman/Research/GDB-11-wB97X-6-31gd/train_07/' #Network Parameter Files cnstfile = wkdir + 'rHCNO-4.5A_32-3.1A_a8-8.params' saefile = wkdir + 'sae_6-31gd.dat' nnfdir = wkdir + 'networks/' # Construct pyNeuroChem classes nc = pync.pyNeuroChem(cnstfile,saefile,nnfdir,0) xyz,typ,Eact = gt.readncdat('/home/jujuman/Dropbox/Research/ChemSciencePaper/TestCases/Atomoxetine/Atomoxetine_conformersC_test.dat') Eact = np.array(Eact) # Set the conformers in NeuroChem nc.setConformers(confs=xyz,types=typ) # Print some data from the NeuroChem print( 'Number of Atoms Loaded: ' + str(nc.getNumAtoms()) ) print( 'Number of Confs Loaded: ' + str(nc.getNumConfs()) ) # Compute Forces of Conformations print('Computing energies 1...') Ecmp = np.array( nc.computeEnergies() ) print('Computation complete 1.')
from scipy import stats as st # Set required files for pyNeuroChem wkdir = '/home/jujuman/Research/GDB-11-wB97X-6-31gd/train_07/' #Network Parameter Files cnstfile = wkdir + 'rHCNO-4.5A_32-3.1A_a8-8.params' saefile = wkdir + 'sae_6-31gd.dat' nnfdir = wkdir + 'networks/' # Construct pyNeuroChem classes nc = pync.pyNeuroChem(cnstfile, saefile, nnfdir, 0) xyz, typ, Eact = gt.readncdat( '/home/jujuman/Dropbox/Research/ChemSciencePaper/TestCases/Atomoxetine/Atomoxetine_conformersC_test.dat' ) Eact = np.array(Eact) # Set the conformers in NeuroChem nc.setConformers(confs=xyz, types=typ) # Print some data from the NeuroChem print('Number of Atoms Loaded: ' + str(nc.getNumAtoms())) print('Number of Confs Loaded: ' + str(nc.getNumConfs())) # Compute Forces of Conformations print('Computing energies 1...') Ecmp = np.array(nc.computeEnergies()) print('Computation complete 1.')
Eerr = [] time = 0.0 ld = [0,0.0] sd = [0,100000.0] err = [] sze = [] N = 0 for i in files: #for i in range(rng[0],rng[1]): #xyz,typ,Eact_t,readf = gt.readncdat(dtdir + fpref + str(i) + fpost) xyz,typ,Eact_t,readf = gt.readncdat(dtdir + i) if readf: # Set the conformers in NeuroChem nc.setConformers(confs=xyz,types=typ) #print('FILE: ' + dtdir + fpref + str(i) + fpost) print('FILE: ' + dtdir + i) # Print some data from the NeuroChem print( '1) Number of Atoms Loaded: ' + str(nc.getNumAtoms()) ) print( '1) Number of Confs Loaded: ' + str(nc.getNumConfs()) ) # Compute Energies of Conformations print('Computing energies...') _t1b = tm.time()
N = 30 print('Computing coulomb matrices...') _t1b = tm.time() cnt=0 for i in files: cnt += 1 print('FILE: ' + str(cnt) + ' of ' + str(len(files)) + ' ' + i) # Set file file = dtdir + i # Get training molecules xyz_tr, typ_tr, Eact_tr, readf = gt.readncdat(file, np.float32) # Compute energy of atoms at infinite separation ise = cm.computerISE(typ_tr) Eact_tr = (Eact_tr - ise) Nm = Eact_tr.shape[0] cmat = cm.GenCMatData(xyz_tr,typ_tr,N) cmat[:, N * N ] = Eact_tr output_file = open(cmdfile + '_' + str(cnt), 'wb') cmat.tofile(output_file) output_file.close()
plt.rc('font', **font) # Set required files for pyNeuroChem wkdir = '/home/jujuman/Research/GDB-11-wB97X-6-31gd/train_08_5/' # Network Files cnstfile = wkdir + 'rHCNO-4.7A_32-3.2A_a8-8.params' saefile = wkdir + 'sae_6-31gd.dat' nnfdir = wkdir + 'networks/' # Construct pyNeuroChem classes nc = pync.pyNeuroChem(cnstfile,saefile,nnfdir,0) # Read nc DATA xyz,typ,Eact = gt.readncdat('/home/jujuman/Dropbox/ChemSciencePaper.AER/TestCases/Retinol/data/retinolconformer_DFT.dat') xyz1,typ1,Eact1 = gt.readncdat('/home/jujuman/Dropbox/ChemSciencePaper.AER/TestCases/Retinol/data/retinolconformer_DFTB.dat') Eact = np.array(Eact) Eact1 = np.array(Eact1) # Set the conformers in NeuroChem nc.setConformers(confs=xyz,types=typ) # Print some data from the NeuroChem print( 'Number of Atoms Loaded: ' + str(nc.getNumAtoms()) ) print( 'Number of Confs Loaded: ' + str(nc.getNumConfs()) ) # Compute Forces of Conformations print('Computing energies...') _t1b = tm.time()
sze = [] Herror = 0.0 Wfile = '' Lerror = 100.0 Bfile = '' Nf = len(files) cnt = 0 for i in files: cnt += 1 #for i in range(rng[0],rng[1]): #xyz,typ,Eact_t,readf = gt.readncdat(dtdir + fpref + str(i) + fpost) xyz,typ,Eact_W,readf = gt.readncdat(dtdir + i) xyz = np.asarray(xyz,dtype=np.float32) xyz = xyz.reshape((xyz.shape[0],len(typ),3)) if readf and xyz.shape[0] > 0: #print('FILE: ' + dtdir + fpref + str(i) + fpost) print('FILE: ' + str(cnt) + ' of ' + str(Nf) + ' ' + i) Nm = xyz.shape[0] Na = xyz.shape[1] Nat = Na * Nm Nit = int(np.ceil(Nat/65000.0))
for d in dtdirs: tmp = listdir(d) files = list({("_".join(f.split("_")[:-1])) for f in tmp}) allarr = [] Natoms = [] Nconfs = [] typarr = [] for i in files: print(d+i) nc = 0 for k in namelist: try: _timeloop = tm.time() readarrays = gt.readncdat(d+i+k) _timeloop2 = (tm.time() - _timeloop) totaltime += _timeloop2 #print('Computation complete. Time: ' + "{:.4f}".format(_timeloop2) + 'ms') shapesarr = [x.shape for x in readarrays] typ = readarrays[1] except FileNotFoundError: readarrays = [np.zeros((0,*x[1:])) for x in shapesarr] ncsub, nat, ndim = readarrays[0].shape nc += ncsub readarrays[0] = readarrays[0].reshape(ncsub*nat,ndim) allarr.append(readarrays) Natoms.append(nat)
#Network 1 Files cnstfile1 = wkdir1 + 'rHCNO-4.6A_32-3.1A_a8-8.params' saefile1 = wkdir1 + 'sae_6-31gd.dat' nnfdir1 = wkdir1 + 'networks/' # Network 2 Files cnstfile2 = wkdir2 + 'rHCNO-4.6A_32-3.1A_a8-8.params' saefile2 = wkdir2 + 'sae_6-31gd.dat' nnfdir2 = wkdir2 + 'networks/' # Construct pyNeuroChem classes nc1 = pync.pyNeuroChem(cnstfile1, saefile1, nnfdir1, 0) nc2 = pync.pyNeuroChem(cnstfile2, saefile2, nnfdir2, 0) xyz, typ, Eact = gt.readncdat( '/home/jujuman/Dropbox/Research/ChemSciencePaper/TestCases/C10H20Isomers/isomer_structures_DFT.dat' ) Eact = np.array(Eact) # Set the conformers in NeuroChem nc1.setConformers(confs=xyz, types=typ) nc2.setConformers(confs=xyz, types=typ) # Print some data from the NeuroChem print('Number of Atoms Loaded: ' + str(nc1.getNumAtoms())) print('Number of Confs Loaded: ' + str(nc1.getNumConfs())) # Compute Forces of Conformations print('Computing energies 1...') _t1b = tm.time()
xy = sorted(xy, key=lambda x: x[0]) X, Y = zip(*xy) return np.array(Y) # Set required files for pyNeuroChem wkdir1 = '/home/jujuman/Research/GDB-11-wB97X-6-31gd/dataset_size_testing/train_08_1.00/' #Network 1 Files cnstfile1 = wkdir1 + 'rHCNO-4.5A_32-3.1A_a8-8.params' saefile1 = wkdir1 + 'sae_6-31gd.dat' nnfdir1 = wkdir1 + 'networks/' dtdir = '/home/jujuman/Dropbox/ChemSciencePaper.AER/TestCases/C10H20Isomers/' #xyz,typ,Eact = gt.readncdat('../data_irc.dat') xyz,typ,Eact,tmp = gt.readncdat(dtdir + 'isomer_structures_DFT.dat') #xyz2,typ2,Eact2,tmp = gt.readncdat(dtdir + 'isomer_structures_DFTB.dat') #xyz3,typ3,Eact3,tmp = gt.readncdat(dtdir + 'isomer_structures_PM6.dat') #xyz = [xyz[0],xyz[1]] #xyz2 = [xyz2[0],xyz2[1]] #xyz3 = [xyz3[0],xyz3[1]] xyz = np.asarray(xyz,dtype=np.float32) xyz = xyz.reshape((xyz.shape[0],len(typ),3)) Eact = np.array(Eact) #Eact2 = np.array(Eact2) #Eact3 = np.array(Eact3) # Construct pyNeuroChem classes
print('Computing coulomb matrices...') _t1b = tm.time() cnt = 0 output_file = open(cmdfile + '.dat', 'wb') for i in files: cnt += 1 print('FILE: ' + str(cnt) + ' of ' + str(len(files)) + ' ' + i) # Set file file = dtdir + i # Get training molecules xyz_tr, typ_tr, Eact_tr, readf = gt.readncdat(file, np.float32) xyz_tr = xyz_tr[0:int(P * xyz_tr.shape[0])] Eact_tr = Eact_tr[0:int(P * Eact_tr.shape[0])] # Compute energy of atoms at infinite separation ise = cm.computerISE(typ_tr) Eact_tr = (Eact_tr - ise) Nm = Eact_tr.shape[0] cmat = cm.GenCMatData(xyz_tr, typ_tr, N) cmat[:, N * N] = Eact_tr cmat.tofile(output_file)