def prepare_confs_iso(self): prefix = 'ad' confs_dir = self.ldtdir + self.datdir + '/confs/' isoms_dir = self.ldtdir + self.datdir + '/confs_iso/' files = os.listdir(confs_dir) files = [f for f in files if f.rsplit('.', maxsplit=1)[-1] == 'xyz'] #print(len(files)) ds = dict() of = open(self.ldtdir + self.datdir + '/info_confstoiso_map.dat', 'w') for i, f in enumerate(files): #print(confs_dir + f) X, S, N, C = hdt.readxyz2(confs_dir + f) S = np.array(S) idx = sorted(range(len(S)), key=lambda k: S[k]) S = S[np.array(idx)] for j, x in enumerate(X): X[j] = x[idx] id = "".join(S) if id in ds: sid = len(ds[id]) of.write(f + ' ' + convert_eformula(S) + ' ' + str(sid) + ' ' + str(X.shape[0]) + '\n') ds[id].append((X, S)) else: of.write(f + ' ' + convert_eformula(S) + ' ' + str(0) + ' ' + str(X.shape[0]) + '\n') ds.update({id: [(X, S)]}) of.close() Nt = 0 for i in ds.keys(): X = [] S = [] for j in ds[i]: X.append(j[0]) S.append(j[1]) X = np.vstack(X) S = list(S[0]) N = X.shape[0] Nt += N if N < 96: #print(type(S), S) fn = prefix + '_' + convert_eformula(S) + '-' + str(N).zfill(3) + '.xyz' #print('Writing: ', fn) hdt.writexyzfile(isoms_dir + '/' + fn, X, S) else: Nsplit = int(math.ceil(N/float(96))) X = np.array_split(X, Nsplit) for l,x in enumerate(X): fn = prefix + '_' + convert_eformula(S) + '_' + str(l).zfill(2) + '-' + str(x.shape[0]).zfill(3) + '.xyz' hdt.writexyzfile(isoms_dir + '/' + fn, x, S)
def TS_sampling(self, tid, TS_infiles, tsparams, gpuid): activ = aat.MD_Sampler(TS_infiles, self.netdict['cnstfile'], self.netdict['saefile'], self.netdict['nnfprefix'], self.netdict['num_nets'], gpuid) T = tsparams['T'] sig = tsparams['sig'] Ns = tsparams['n_samples'] n_steps = tsparams['n_steps'] steps = tsparams['steps'] min_steps = tsparams['min_steps'] nm = tsparams['normalmode'] displacement = tsparams['displacement'] difo = open( self.ldtdir + self.datdir + '/info_tssampler-' + str(tid) + '.nfo', 'w') for f in TS_infiles: X = [] ftme_t = 0.0 fail_count = 0 sumsig = 0.0 for i in range(Ns): x, S, t, stddev, fail, temp = activ.run_md( f, T, steps, n_steps, nmfile=f.rsplit(".", 1)[0] + '.log', displacement=displacement, min_steps=min_steps, sig=sig, nm=nm) sumsig += stddev if fail: #print('Job '+str(i)+' failed in '+"{:.2f}".format(t)+' Sigma: ' + "{:.2f}".format(stddev)+' SetTemp: '+"{:.2f}".format(temp)) difo.write('Job ' + str(i) + ' failed in ' + "{:.2f}".format(t) + 'fs Sigma: ' + "{:.2f}".format(stddev) + ' SetTemp: ' + "{:.2f}".format(temp) + '\n') X.append(x[np.newaxis, :, :]) fail_count += 1 else: #print('Job '+str(i)+' succeeded.') difo.write('Job ' + str(i) + ' succeeded.\n') ftme_t += t print('Complete mean fail time: ' + "{:.2f}".format(ftme_t / float(Ns)) + ' failed ' + str(fail_count) + '/' + str(Ns) + '\n') difo.write('Complete mean fail time: ' + "{:.2f}".format(ftme_t / float(Ns)) + ' failed ' + str(fail_count) + '/' + str(Ns) + ' MeanSig: ' + "{:.2f}".format(sumsig / float(Ns)) + '\n') X = np.vstack(X) hdt.writexyzfile(self.cdir + os.path.basename(f), X, S) del activ difo.close()
def mol_dyn_sampling(self, md_work, i, N, T1, T2, dt, Nc, Ns, sig, gpuid): activ = aat.moldynactivelearning(self.netdict['cnstfile'], self.netdict['saefile'], self.netdict['nnfprefix'], self.netdict['num_nets'], gpuid) difo = open( self.ldtdir + self.datdir + '/info_data_mdso-' + str(i) + '.nfo', 'w') Nmol = 0 dnfo = 'MD Sampler running: ' + str(md_work.size) difo.write(dnfo + '\n') Nmol = md_work.size ftme_t = 0.0 for di, id in enumerate(md_work): data = hdt.read_rcdb_coordsandnm(id) #print(di, ') Working on', id, '...') S = data["species"] # Set mols activ.setmol(data["coordinates"], S) # Generate conformations X = activ.generate_conformations(N, T1, T2, dt, Nc, Ns, dS=sig) ftme_t += activ.failtime nfo = activ._infostr_ m = id.rsplit('/', 1)[1].rsplit('.', 1)[0] difo.write(' -' + m + ': ' + nfo + '\n') difo.flush() #print(nfo) if X.size > 0: hdt.writexyzfile( self.cdir + 'mds_' + m.split('.')[0] + '_' + str(i).zfill(2) + str(di).zfill(4) + '.xyz', X, S) difo.write('Complete mean fail time: ' + "{:.2f}".format(ftme_t / float(Nmol)) + '\n') print(Nmol) del activ difo.close()
storedir = '/home/jujuman/Research/extensibility_test_sets/drugbank/' suppl = Chem.SDMolSupplier( '/home/jujuman/Dropbox/ChemSciencePaper.AER/Benchmark_Datasets/drugbank/drugbank_3d_1564.sdf', removeHs=False) for id, m in enumerate(suppl): if m is None: continue name = m.GetProp('_Name') xyz, spc = pya.__convert_rdkitconfs_to_nparr__(m) print(xyz.shape) print(name, id, spc) hdt.writexyzfile(storedir + 'xyz/drugbank_' + str(id).zfill(4) + '.xyz', xyz, spc) hdt.write_rcdb_input(xyz[0], spc, id, storedir, 'drugbank', 10, 'wb97x/6-31g*', '300.0', fill=4, comment='Name: ' + name) # Print size #print(m.GetNumAtoms(), m.GetNumHeavyAtoms())
def plot_irc(axes, i, d, f): #print(f) Eact, xyz, spc, Rc = pyg.read_irc(d + f) Eact = hdt.hatokcal * Eact xyz = xyz[1:] Eact = Eact[1:] Rc = Rc[:-1] #print(Rc[:,1]) #print(Eact-Eact.min() - Rc[:,1]-Rc[:,1].min()) s_idx = f.split('IRC')[1].split('.')[0] hdt.writexyzfile(c + f.split('.')[0] + '.xyz', xyz, spc) #print(f.split('IRC')[1].split('.')[0],Rc.shape) if Rc.size > 10: #------------ CV NETWORKS 1 ----------- energies1 = [] N = 0 for comp in nc1: comp.setConformers(confs=xyz, types=list(spc)) energies1.append(hdt.hatokcal * comp.energy()) N = N + 1 energies2 = [] N = 0 for comp in nc2: comp.setConformers(confs=xyz, types=list(spc)) energies2.append(hdt.hatokcal * comp.energy()) N = N + 1 modl_std1 = np.std(energies1, axis=0)[::-1] energies1 = np.mean(np.vstack(energies1), axis=0) modl_std2 = np.std(energies2, axis=0)[::-1] energies2 = np.mean(np.vstack(energies2), axis=0) rmse1 = hdt.calculaterootmeansqrerror(energies1, Eact) rmse2 = hdt.calculaterootmeansqrerror(energies2, Eact) dba = Eact.max() - Eact[0] db1 = energies1.max() - energies1[0] db2 = energies2.max() - energies2[0] rpa = Eact[0] - Eact[-1] rp1 = energies1[0] - energies1[-1] rp2 = energies2[0] - energies2[-1] bar1.append(abs(db1 - dba)) bar2.append(abs(db2 - dba)) rmp1.append(abs(rpa - rp1)) rmp2.append(abs(rpa - rp2)) Ec1.append(energies1) Ec2.append(energies2) Ea.append(Eact) print(i, ')', f, ':', len(spc), ':', rmse1, rmse2, 'R/P1: ', abs(rpa - rp1), 'R/P2: ', abs(rpa - rp2), 'Barrier1:', abs(db1 - dba), 'Barrier2:', abs(db2 - dba)) Rce = hdt.hatokcal * (Rc[:, 0] - Rc[:, 0][0]) Rce1 = energies2[::-1] - energies2[::-1][0] axes.set_xlim([Rc.min(), Rc.max()]) axes.set_ylim([Rce.min() - 1.0, Rce1.max() + 20.0]) axes.plot(Rc[:, 1], hdt.hatokcal * (Rc[:, 0] - Rc[:, 0][0]), color='Black', label='DFT') axes.errorbar(Rc[:, 1], energies2[::-1] - energies2[::-1][0], yerr=modl_std2, fmt='--', color='red', label="ANI-1: " + "{:.1f}".format(bar2[-1]), linewidth=2) axes.errorbar(Rc[:, 1], energies1[::-1] - energies1[::-1][0], yerr=modl_std1, fmt='--', color='blue', label="[" + str(i) + "]: " + "{:.1f}".format(bar1[-1]), linewidth=2) #axes.set_xlabel("Reaction Coordinate $\AA$") #axes.set_ylabel(r"$\Delta E$ $ (kcal \times mol^{-1})$") #axes.plot(Rc[:, 1], energies2[::-1]-energies2[::-1][0],'--',color='red',label="["+str(i)+"]: "+"{:.1f}".format(bar2[-1]),linewidth=3) #axes.plot(Rc[:, 1], energies1[::-1]-energies1[::-1][0],'--',color='green',label="["+str(i)+"]: "+"{:.1f}".format(bar1[-1]),linewidth=3) axes.legend(loc="upper left", fontsize=10) axes.set_title(str(f), color='black', fontdict={'weight': 'bold'}, x=0.8, y=0.85)
El = np.vstack(El) #dEm = np.sum(El, axis=0) / 3.0 bad_idx = [] god_idx = [] for i in range(Ea.shape[0]): test = np.array([True if j > 1.0 else False for j in El[:,i]]) if np.all(test): bad_idx.append(i) else: god_idx.append(i) Nd = Nd + len(god_idx) Nb = Nb + len(bad_idx) if len(god_idx) != 0: # print(gn) dpack.store_data(data['parent'] + "/" + data['name'], coordinates=X[god_idx], energies=Ea[god_idx], species=S) if len(bad_idx) > 0: print(data['parent'],data['name'],'- Bad:',len(bad_idx),'of',len(god_idx)) for i in bad_idx: print('(', i, ')', ':' ,El[:,i]) hdn.writexyzfile(store_xyz+data['parent'] + '_' + data['name']+'.xyz',X[bad_idx],S) dpack.cleanup() print('Bad:',Nb,'of',Nd+Nb)
EANI = np.asarray(EANI).flatten() # Max if abs(E.max()) > maxi: maxi = abs(E.max()) # Min if abs(E.min()) < mini: mini = abs(E.min()) # df_E.append(E) df_Ea.append(EANI) #print (df_E) hdn.writexyzfile("coordlist.xyz", np.concatenate(coord_list), spec_list[0]) import matplotlib.pyplot as plt df_E = hdn.hatokcal * np.array(df_E).flatten() df_Ea = hdn.hatokcal * np.array(df_Ea).flatten() rmse = hdn.calculaterootmeansqrerror(df_E, df_Ea) x = list(range(0, df_E.shape[0])) plt.scatter(df_D, df_E, label='DFT') plt.scatter(df_D, df_Ea, color='red', label='ANI RMSE: ' + "{:.2f}".format(rmse) + 'kcal/mol',
sid = len(ds[id]) of.write(f + ' ' + convert_eformula(S) + ' ' + str(sid) + ' ' + str(X.shape[0]) + '\n') ds[id].append((X, S)) else: of.write(f + ' ' + convert_eformula(S) + ' ' + str(0) + ' ' + str(X.shape[0]) + '\n') ds.update({id: [(X, S)]}) of.close() #print(i,len(ds)) Nt = 0 for i in ds.keys(): X = [] S = [] for j in ds[i]: X.append(j[0]) S.append(j[1]) X = np.vstack(X) S = list(S[0]) N = X.shape[0] Nt += N print(type(S), S) fn = prefix + '_' + convert_eformula(S) + '-' + str(N).zfill(5) + '.xyz' print('Writing: ', fn) hdt.writexyzfile(ndir + fn, X, S) print('Total data:', Nt)
def __fragmentbox__(self, file, sig_cut): self.X = self.mol.get_positions() self.frag_list = [] self.Nd = 0 self.Nt = 0 for i in range(len(self.Na)): si = self.ctd[i][2] di = self.ctd[i][1] Nai = self.Na[i] Xci = np.sum(self.X[si:si + Nai, :], axis=0) / Nai Xi = self.X[si:si + Nai, :] if np.all(Xci > di) and np.all(Xci < self.L - di): for j in range(i + 1, len(self.Na)): sj = self.ctd[j][2] dj = self.ctd[j][1] Naj = self.Na[j] Xcj = np.sum(self.X[sj:sj + Naj, :], axis=0) / Naj Xj = self.X[sj:sj + Naj, :] if np.all(Xcj > dj) and np.all(Xcj < self.L - dj): dc = np.linalg.norm(Xci - Xcj) if dc < di + dj + 6.0: min = 10.0 for ii in range(Nai): Xiii = Xi[ii] for jj in range(Naj): Xjjj = Xj[jj] v = np.linalg.norm(Xiii - Xjjj) if v < min: min = v if min < 4.2 and min > 1.1: Xf = np.vstack([Xi, Xj]) Sf = self.S[si:si + Nai] Sf.extend(self.S[sj:sj + Naj]) Xcf = np.sum(Xf, axis=0) / (Nai + Naj) Xf = Xf - Xcf E = np.empty(5, dtype=np.float64) for id, nc in enumerate(self.aens.ncl): nc.setMolecule(coords=np.array( Xf, dtype=np.float32), types=Sf) E[id] = nc.energy()[0] sig = np.std( hdn.hatokcal * E) / np.sqrt(Nai + Naj) self.Nt += 1 if sig > sig_cut: self.Nd += 1 hdn.writexyzfile( file + str(i).zfill(4) + '-' + str(j).zfill(4) + '.xyz', Xf.reshape(1, Xf.shape[0], 3), Sf) self.frag_list.append( dict({ 'coords': Xf, 'spec': Sf }))
mol.set_calculator(ANI(False)) mol.calc.setnc(nc) # Optimize molecule start_time = time.time() dyn = LBFGS(mol) dyn.run(fmax=C) print('[ANI Total time:', time.time() - start_time, 'seconds]') print(hdt.evtokcal * mol.get_potential_energy()) # Save optimized mol spc = mol.get_chemical_symbols() pos = mol.get_positions(wrap=True).reshape(1, len(spc), 3) hdt.writexyzfile(optfile, pos, spc) exit(0) # Open MD output mdcrd = open(xyzfile, 'w') # Open MD output traj = open(trajfile, 'w') # We want to run MD with constant energy using the Langevin algorithm # with a time step of 0.5 fs, the temperature T and the friction # coefficient to 0.02 atomic units. dyn = Langevin(mol, 0.1 * units.fs, T * units.kB, 0.005) # Run equilibration
import hdnntools as hdt import pyanitools as pyt import os file = '/home/jujuman/Research/DataReductionMethods/model6/model0.05me/ani_red_c06.h5' sdir = '/home/jujuman/Research/GDB-11-AL-wB97x631gd/' aload = pyt.anidataloader(file) for data in aload: X = data['coordinates'] S = data['species'] P = data['path'] parent = P.split('/')[1] index = P.split('/')[2].split('mol')[1].zfill(7) path = sdir+parent if not os.path.exists(path): os.mkdir(path) print(path + '/' + parent + '-' + index + '.xyz','DATA:',X.shape[0]) hdt.writexyzfile(path+'/'+parent+'-'+index+'.xyz',X,S)
xyz = data["coordinates"] nmc = data["nmdisplacements"] frc = data["forceconstant"] nms = nmt.nmsgenerator(xyz, nmc, frc, spc, T, minfc=1.0E-2) conformers = [] for i in range(Ngen): conformers.append(nms.get_random_structure()) conformers = np.stack(conformers) nc.setConformers(confs=conformers, types=list(spc)) Ecmp = nc.energy() # this generates AEVs aevs = np.empty([Ngen, len(rcatoms) * aevsize]) for m in range(Ngen): for j, a in enumerate(rcatoms): aevs[m, j * aevsize:(j + 1) * aevsize] = nc.atomicenvironments( a, m).copy() dm = scispc.distance.pdist(aevs, 'sqeuclidean') picker = rdSimDivPickers.MaxMinPicker() seed_list = [i for i in range(Ngen)] np.random.shuffle(seed_list) print('seed:', seed_list) ids = list(picker.Pick(dm, Ngen, Nkep, firstPicks=list(seed_list[0:5]))) ids.sort() print(f, len(ids), conformers.shape, dm.shape, ":", ids) hdn.writexyzfile(cdir + f.split('.')[0] + '.xyz', conformers[ids], spc) of.close()
psi.append(i.split('_')[2].split('.')[0]) data = hdt.readxyz2(dir + i) print(data[0]) print(data[1]) xyz.append(data[0]) nc.setMolecule(coords=data[0][0], types=list(data[1])) Eact.append(nc.energy()[0]) xyz = np.vstack(xyz) #print(xyz) hdt.writexyzfile( '/home/jujuman/Dropbox/ChemSciencePaper.AER/JustinsDocuments/ACS_april_2017/DipeptidePhiPsi/data.xyz', xyz, list(data[1])) data2 = hdt.readncdat( '/home/jujuman/Dropbox/ChemSciencePaper.AER/JustinsDocuments/ACS_april_2017/DipeptidePhiPsi/data.dat', type=np.float32) Eact = np.array(Eact) Edft = data2[2] z = np.array(hdt.hatokcal * (Eact - Eact.min()), dtype=np.float32).reshape(x.shape[0], x.shape[0]) z2 = np.array(hdt.hatokcal * (Edft - Edft.min()), dtype=np.float32).reshape(x.shape[0], x.shape[0]) rmse = hdt.calculaterootmeansqrerror(z, z2)
def pDyn_QMsampling(self, pdynparams, gpuid): #Call subproc_pDyn class in pyaniasetools as activ activ = aat.subproc_pDyn(self.netdict['cnstfile'], self.netdict['saefile'], self.netdict['nnfprefix'], self.netdict['num_nets'], gpuid) pDyn_dir = pdynparams['pDyn_dir'] #Folder to write pDynamo input file num_rxn = pdynparams['num_rxn'] #Number of input rxn logfile_OPT = pdynparams['logfile_OPT'] #logfile for FIRE OPT output logfile_TS = pdynparams['logfile_TS'] #logfile for ANI TS output logfile_IRC = pdynparams['logfile_IRC'] #logfile for ANI IRC output sbproc_cmdOPT = pdynparams[ 'sbproc_cmdOPT'] #Subprocess commands to run pDyanmo sbproc_cmdTS = pdynparams['sbproc_cmdTS'] sbproc_cmdIRC = pdynparams['sbproc_cmdIRC'] IRCdir = pdynparams['IRCdir'] #path to get pDynamo saved IRC points indir = pdynparams[ 'indir'] #path to save XYZ files of IRC points to check stddev XYZfile = pdynparams[ 'XYZfile'] #XYZ file with high standard deviations structures l_val = pdynparams[ 'l_val'] #Ri --> randomly perturb in the interval [+x,-x] h_val = pdynparams['h_val'] n_points = pdynparams[ 'n_points'] #Number of points along IRC (forward+backward+1 for TS) sig = pdynparams['sig'] N = pdynparams['N'] wkdir = pdynparams['wkdir'] cnstfilecv = pdynparams['cnstfilecv'] saefilecv = pdynparams['saefilecv'] Nnt = pdynparams['Nnt'] # --------------------------------- Run pDynamo --------------------------- # auto-TS ---> FIRE constraint OPT of core C atoms ---> ANI TS ---> ANI IRC activ.write_pDynOPT(num_rxn, pDyn_dir, wkdir, cnstfilecv, saefilecv, Nnt) #Write pDynamo input file in pDyndir activ.write_pDynTS(num_rxn, pDyn_dir, wkdir, cnstfilecv, saefilecv, Nnt) activ.write_pDynIRC(num_rxn, pDyn_dir, wkdir, cnstfilecv, saefilecv, Nnt) chk_OPT = activ.subprocess_cmd(sbproc_cmdOPT, False, logfile_OPT) if chk_OPT == 0: #Wait until previous subproc is done!! chk_TS = activ.subprocess_cmd(sbproc_cmdTS, False, logfile_TS) if chk_TS == 0: chk_IRC = activ.subprocess_cmd(sbproc_cmdIRC, False, logfile_IRC) # ----------------------- Save points along ANI IRC ------------------------ IRCfils = os.listdir(IRCdir) IRCfils.sort() for f in IRCfils: activ.getIRCpoints_toXYZ(n_points, IRCdir + f, f, indir) infils = os.listdir(indir) infils.sort() # ------ Check for high standard deviation structures and get vib modes ----- for f in infils: stdev = activ.check_stddev(indir + f, sig) if stdev > sig: #if stddev is high then get modes for that point nmc = activ.get_nm(indir + f) #save modes in memory for later use activ.write_nm_xyz( XYZfile ) #writes all the structures with high standard deviations to xyz file # ----------------------------- Read XYZ for NM ----------------------------- X, spc, Na, C = hdt.readxyz3(XYZfile) # --------- NMS for XYZs with high stddev -------- for i in range(len(X)): for j in range(len(nmc)): gen = nmt.nmsgenerator_RXN( X[i], nmc[j], spc[i], l_val, h_val) # xyz,nmo,fcc,spc,T,Ri_-x,Ri_+x,minfc = 1.0E-3 N = 500 gen_crd = np.zeros((N, len(spc[i]), 3), dtype=np.float32) for k in range(N): gen_crd[k] = gen.get_random_structure() hdt.writexyzfile(self.cdir + 'nms_TS%i.xyz' % N, gen_crd, spc[i]) del activ
dtype=np.float32).reshape(xyz.shape[0], 3) #if conv: # print('Failed to converge!!!') energies = np.zeros((Nnc), dtype=np.float64) #------------ CV NETWORKS 1 ----------- N = 0 for comp in nc1: comp.setMolecule(coords=xyz, types=list(spc)) energies[N] = comp.energy()[0] N = N + 1 if np.std(hdt.hatokcal * energies) > 5.0: hdt.writexyzfile( '/home/jujuman/Research/CrossValidation/GDB-06-High-sdev/CV1bmol-' + str(total_mol) + '.xyz', xyz.reshape(1, xyz.shape[0], xyz.shape[1]), spc) total_bad = total_bad + 1 perc = int(100.0 * total_bad / float(total_mol)) output = ' ' + str(k) + ' ' + str(total_bad) + '/' + str( total_mol) + ' ' + str(perc) + '% (' + str( Na) + ') : stps=' + str(stps) + ' : ' + str( energies) + ' : std(kcal/mol)=' + str( np.std(hdt.hatokcal * energies)) + ' : ' + Chem.MolToSmiles(m) if np.std(hdt.hatokcal * energies) > 5.0: print("CV1:", output) f1.write(output + '\n')
if not ('F' in Chem.MolToSmiles(m)): print(n,') Working on',l,Chem.MolToSmiles(m),'...') # Add hydrogens m = Chem.AddHs(m) # generate Nc conformers cids = AllChem.EmbedMultipleConfs(m, N, useRandomCoords=True) # Classical Optimization for cid in cids: _ = AllChem.MMFFOptimizeMolecule(m, confId=cid, maxIters=250) # Set mols activ.setrdkitmol(m,cids) # Generate conformations X = activ.generate_conformations(N, T, dt, 2000, 10, dS = 0.34) nfo = activ._infostr_ difo.write(' -'+l+': '+nfo+'\n') print(nfo) difo.flush() # Set chemical symbols Xi, S = pya.__convert_rdkitmol_to_nparr__(m) # Store structures hdt.writexyzfile(sdir+l+'-'+str(n).zfill(2)+'.xyz', X, S) difo.close()
Nnc = 2 files = os.listdir(d) files.sort() #print(files) # Construct pyNeuroChem classes nc1 = [pync.conformers(wkdir1 + cnstfile, wkdir1 + saefile, wkdir1 + 'train' + str(l) + '/networks/', 0, False) for l in range(Nnc)] comp_xyz = [] for f in files: #print(f) Eact, xyz, spc, Rc = pyg.read_irc(d+f) s_idx = f.split('IRC')[1].split('.')[0] hdt.writexyzfile(c+f.split('.')[0]+'.xyz',xyz,spc) #print(f.split('IRC')[1].split('.')[0],Rc.shape) if Rc.size > 10: #------------ CV NETWORKS 1 ----------- energies = [] N = 0 for comp in nc1: comp.setConformers(confs=xyz, types=list(spc)) energies.append(hdt.hatokcal*comp.energy()) N = N + 1 energies = np.vstack(energies) modl_std = np.std(energies[::-1],axis=0) / float(len(spc)) bad_cnt = 0 bad_xyz = []
nms = nmt.nmsgenerator(xyz,nmc,frc,spc,T,minfc=5.0E-2) conformers = nms.get_Nrandom_structures(Ngen) ids = dc.get_divconfs_ids(conformers, spc, Ngen, Nkep, atmlist) conformers = conformers[ids] print(' -',f,len(ids),conformers.shape) sigma = anicv.compute_stddev_conformations(conformers,spc) #print(sigma) sid = np.where( sigma > 0.0 )[0] print(' -', fi, 'of', len(files), ') File:', f, 'keep:', sid.size,'percent:',"{:.2f}".format(100.0*sid.size/Ngen)) Nt += sigma.size Nk += sid.size if 100.0*sid.size/float(Ngen) > 0: Nkp += sid.size cfn = f.split('.')[0].split('-')[0]+'_'+str(idx).zfill(5)+'-'+f.split('.')[0].split('-')[1]+'.xyz' hdn.writexyzfile(cdir+cfn,conformers[sid],spc) idx += 1 Nkt += Nk Ntt += Nt of.write(' -Total: '+str(Nk)+' of '+str(Nt)+' percent: '+"{:.2f}".format(100.0*Nk/Nt)+'\n') of.flush() print(' -Total:',Nk,'of',Nt,'percent:',"{:.2f}".format(100.0*Nk/Nt)) of.write('\nGrand Total: '+ str(Nkt)+ ' of '+ str(Ntt)+' percent: '+"{:.2f}".format(100.0*Nkt/Ntt)+ ' Kept '+str(Nkp)+'\n') print('\nGrand Total:', Nkt, 'of', Ntt,'percent:',"{:.2f}".format(100.0*Nkt/Ntt), 'Kept',Nkp) of.close()
dtdir = '/home/jujuman/Dropbox/IRC_DBondMig/Benzene_rxn2/rxn_ben'+str(n)+'/IRC.log' #en, cd, ty = pyg.get_irc_data(dtdir+'IRC_fwd.log',dtdir+'IRC_bck.log',dtdir+'saddle_ts.log') en, cd, ty, Rc = pyg.read_irc(dtdir) np.savez(wkdir+'reaction_coordinate.npz',x=Rc) Na = len(ty) Nc = en.shape[0] print (cd.shape,' ',en.shape) #for i,x in enumerate(cd): #hdt.write_rcdb_input(x,ty,i,wkdir,fpf,5,'wb97x/6-31g*','600.0',opt='0') hdt.writexyzfile(wkdir+'irc.xyz',cd,ty) f = open(wkdir+'irc.dat','w') f.write("comment\n") f.write(str(Nc)+'\n') f.write(str(Na)+',') for j in ty: f.write(j+',') f.write('\n') mol = 0 for l,i in enumerate(cd): for j in i: for k in j: f.write(str(k)+',') f.write('') f.write(str(en[l]) + ',' + '\n')
import hdnntools as hdn import pyNeuroChem as pync import matplotlib.pyplot as plt file = "/home/jujuman/Research/ANI-DATASET/rxn_db_mig.h5" al = pyt.anidataloader(file) al.totalload() data = al.get_all_data() al.cleanup() df_E = hdn.hatokcal * data[1].flatten() xyz = data[0].reshape(df_E.shape[0], len(data[2][0]), 3) hdn.writexyzfile('/home/jujuman/crds.xyz', xyz, data[2][0]) # Set required files for pyNeuroChem #wkdir = '/home/jujuman/Dropbox/ChemSciencePaper.AER/ANI-c08e-ntwk/' wkdir = '/home/jujuman/Research/GDB-11-wB97X-6-31gd/ANI-c08e-ntwk_newtrain/' cnstfile = wkdir + 'rHCNO-4.6A_16-3.1A_a4-8.params' saefile = wkdir + 'sae_6-31gd.dat' nnfdir = wkdir + 'networks/' # Construct pyNeuroChem class mol = pync.conformers(cnstfile, saefile, nnfdir, 0) mol.setConformers(confs=xyz, types=list(data[2][0])) E = hdn.hatokcal * mol.energy()
#Nd += len(X) #Nt += Nu Nt += 1 if len(X) > 0: Nd += 1 X = np.stack(X) x = X[0].reshape(1, X.shape[1], 3) # keep only the first guy print(' -kept', len(x), 'of', Nu) if len(X) > 0: S = gdb.get_symbols_rdkitmol(m) hdn.write_rcdb_input(x[0], S, int(id), wdir, fpf, 100, LOT, '500.0', fill=4, comment='smiles: ' + Chem.MolToSmiles(m) + ' GDB-ID: ' + str(ridx)) hdn.writexyzfile(wdir + fpf + '-' + str(id).zfill(4) + '.xyz', x.reshape(1, x.shape[1], x.shape[2]), S) #print(str(id).zfill(8)) molnfo.close() print('Total mols:', Nd, 'of', Nt, 'percent:', "{:.2f}".format(100.0 * Nd / float(Nt)))
mol = Atoms(spc, xyz, calculator=ANI(False)) mol.calc.setnc(nc) LBFGS(mol).run(fmax=0.00001) vib = Vibrations(mol) vib.run() vib.summary() print(xyz) xyz = mol.get_positions().copy() print(xyz) nm_cr = vib.modes[6:] Nf = 3 * len(spc) - 6 nmo = nm_cr.reshape(Nf, len(spc), 3) fcc = np.array([ 1.314580, 1.3147106, 1.3149728, 1.5161799, 1.5164505, 5.6583018, 6.7181139, 6.7187967, 6.7193842 ]) gen = nm.nmsgenerator(xyz, nmo, fcc, spc, 2000.0) N = 2000 gen_crd = np.zeros((N, len(spc), 3), dtype=np.float32) for i in range(N): gen_crd[i] = gen.get_random_structure() hdt.writexyzfile('pynmstesting.xyz', gen_crd, spc)
#dir = '/home/jujuman/Research/GDB-11-wB97X-6-31gd/dnnts_rxns/scans_double_bond_migration/' #dir = '/home/jujuman/Dropbox/IRC_DBondMig/rxn1/' dir = '/home/jujuman/Dropbox/IRC_DBondMig/rxn5/' filef = dir + 'IRC_fwd.log' fileb = dir + 'IRC_bck.log' dataf = g09.read_irc(filef) datab = g09.read_irc(fileb) xyz = np.concatenate([np.flipud(datab[1]), dataf[1]]) eng = hdn.hatokcal * np.concatenate([np.flipud(datab[0]), dataf[0]]) print(xyz.shape) hdn.writexyzfile(dir + 'scan.xyz', xyz, dataf[2]) # Set required files for pyNeuroChem # Set required files for pyNeuroChem anipath = '/home/jujuman/Dropbox/ChemSciencePaper.AER/ANI-c08e-ntwk' cnstfile = anipath + '/rHCNO-4.6A_16-3.1A_a4-8.params' saefile = anipath + '/sae_6-31gd.dat' nnfdir = anipath + '/networks/' # Construct pyNeuroChem class nc = pync.conformers(cnstfile, saefile, nnfdir, 0) # Set the conformers in NeuroChem nc.setConformers(confs=xyz, types=dataf[2])
ftme_t = 0.0 for n, m in enumerate(files[0:int(id[0] * len(files))]): data = hdn.read_rcdb_coordsandnm(id[1] + m) S = data["species"] print(n, ') Working on', m, '...') # Set mols activ.setmol(data["coordinates"], S) # Generate conformations X = activ.generate_conformations(N, T1, T2, dt, Nc, Ns, dS=0.25) ftme += activ.failtime ftme_t += activ.failtime nfo = activ._infostr_ difo.write(' -' + m + ': ' + nfo + '\n') difo.flush() print(nfo) if X.size > 0: hdn.writexyzfile( dstore + 'mds_' + m.split('.')[0] + '_' + str(di).zfill(4) + '.xyz', X, S) difo.write('Class mean fail time: ' + "{:.2f}".format(ftme_t / float(len(files))) + '\n') difo.write('Complete mean fail time: ' + "{:.2f}".format(ftme / float(Nmol)) + '\n') print(Nmol) difo.close()
def normal_mode_sampling(self, T, Ngen, Nkep, sig, gpuid): of = open(self.ldtdir + self.datdir + '/info_data_nms.nfo', 'w') aevsize = self.netdict['aevsize'] anicv = aat.anicrossvalidationconformer(self.netdict['cnstfile'], self.netdict['saefile'], self.netdict['nnfprefix'], self.netdict['num_nets'], gpuid, False) dc = aat.diverseconformers(self.netdict['cnstfile'], self.netdict['saefile'], self.netdict['nnfprefix'] + '0/networks/', aevsize, gpuid, False) Nkp = 0 Nkt = 0 Ntt = 0 idx = 0 for di, id in enumerate(self.idir): of.write( str(di) + ' of ' + str(len(self.idir)) + ') dir: ' + str(id) + '\n') #print(di,'of',len(self.idir),') dir:', id) files = os.listdir(id) files.sort() Nk = 0 Nt = 0 for fi, f in enumerate(files): data = hdt.read_rcdb_coordsandnm(id + f) #print(id+f) spc = data["species"] xyz = data["coordinates"] nmc = data["nmdisplacements"] frc = data["forceconstant"] nms = nmt.nmsgenerator(xyz, nmc, frc, spc, T, minfc=5.0E-2) conformers = nms.get_Nrandom_structures(Ngen) ids = dc.get_divconfs_ids(conformers, spc, Ngen, Nkep, []) conformers = conformers[ids] #print(' -',f,len(ids),conformers.shape) sigma = anicv.compute_stddev_conformations(conformers, spc) #print(sigma) sid = np.where(sigma > sig)[0] #print(sid) #print(' -', fi, 'of', len(files), ') File:', f, 'keep:', sid.size,'percent:',"{:.2f}".format(100.0*sid.size/Ngen)) Nt += sigma.size Nk += sid.size if 100.0 * sid.size / float(Ngen) > 0: Nkp += sid.size cfn = f.split('.')[0].split('-')[0] + '_' + str(idx).zfill( 5) + '-' + f.split('.')[0].split('-')[1] + '_2.xyz' hdt.writexyzfile(self.cdir + cfn, conformers[sid], spc) idx += 1 Nkt += Nk Ntt += Nt of.write(' -Total: ' + str(Nk) + ' of ' + str(Nt) + ' percent: ' + "{:.2f}".format(100.0 * Nk / Nt) + '\n') of.flush() #print(' -Total:',Nk,'of',Nt,'percent:',"{:.2f}".format(100.0*Nk/Nt)) del anicv del dc of.write('\nGrand Total: ' + str(Nkt) + ' of ' + str(Ntt) + ' percent: ' + "{:.2f}".format(100.0 * Nkt / Ntt) + ' Kept ' + str(Nkp) + '\n') #print('\nGrand Total:', Nkt, 'of', Ntt,'percent:',"{:.2f}".format(100.0*Nkt/Ntt), 'Kept',Nkp) of.close()
def __fragmentbox__(self, file, sighat): self.X = self.mol.get_positions() self.frag_list = [] self.Nd = 0 self.Nt = 0 self.maxsig = 0 for i in range(len(self.Na)): si = self.ctd[i][2] di = self.ctd[i][1] Nai = self.Na[i] Xci = np.sum(self.X[si:si + Nai, :], axis=0) / Nai Xi = self.X[si:si + Nai, :] if np.all(Xci > 4.5) and np.all(Xci <= self.L - 4.5): if np.all(Xci > di) and np.all(Xci < self.L - di): Xf = Xi Sf = self.S[si:si + Nai] Nmax = random.randint(2, 14) Nmol = 0 for j in range(len(self.Na)): if i != j: sj = self.ctd[j][2] dj = self.ctd[j][1] Naj = self.Na[j] Xcj = np.sum(self.X[sj:sj + Naj, :], axis=0) / Naj Xj = self.X[sj:sj + Naj, :] if np.all(Xcj > dj) and np.all(Xcj < self.L - dj): dc = np.linalg.norm(Xci - Xcj) if dc < di + dj + 5.0: min = 10.0 for ii in range(Nai): Xiii = Xi[ii] for jj in range(Naj): Xjjj = Xj[jj] v = np.linalg.norm(Xiii - Xjjj) if v < min: min = v if min < 4.5 and min > 0.70: Xf = np.vstack([Xf, Xj]) Sf.extend(self.S[sj:sj + Naj]) Nmol += 1 if Nmol >= Nmax: break Xcf = np.sum(Xf, axis=0) / float(len(Sf)) Xf = Xf - Xcf E = np.empty(5, dtype=np.float64) for id, nc in enumerate(self.aens.ncl): nc.setMolecule(coords=np.array(Xf, dtype=np.float32), types=Sf) E[id] = nc.energy()[0] sig = np.std(hdn.hatokcal * E) / np.sqrt(Nai + Naj) #print('Mol(',i,'): sig=',sig) self.Nt += 1 if sig > sighat: if sig > self.maxsig: self.maxsig = sig self.Nd += 1 hdn.writexyzfile(file + str(i).zfill(4) + '.xyz', Xf.reshape(1, Xf.shape[0], 3), Sf) self.frag_list.append(dict({'coords': Xf, 'spec': Sf}))
if sigma > 0.1: Nb = Nb + 1 if key not in data.keys(): data[key] = (spc_l,[xyz_l]) else: #print(type()) p_xyz = data[key][1] p_xyz.append(xyz_l) data[key] = (spc_l, p_xyz) ''' print('H2O_frag' + str(i).zfill(3) + '.xyz : O=' + str(No).zfill(3) + ' : H=' + str(Nh).zfill(3) + ' : ' + "{:13.3f}".format(energies[0]) + ' ' + "{:13.3f}".format(energies[1]) + ' ' + "{:13.3f}".format(energies[2]) + ' ' + "{:13.3f}".format(energies[3]) + ' ' + "{:13.3f}".format(energies[4]) + ' ' + "{:.3f}".format(sigma)) ''' else: Ne = Ne + 1 print('Warning: fragmentation failed! No:',No,'Nh:',Nh) print('Bad Frags:', Nb, 'of',Nf,'Error:',Ne,'Estd:',Etot/float(Nf), ' Avg. Frc. Std.: ', Ftot/float(Nf)) dyn.run(40) # Do 100 steps of MD for key in data: xyz = np.array(data[key][1]) spc = data[key][0] print(key,xyz.shape[0]) hdt.writexyzfile(stdir + 'frag' + key + '.xyz', xyz, spc)