def cluster_sampling(self, tid, Nr, mol_sizes, mol_temps, seed, gcmddict, gpuid): os.environ["OMP_NUM_THREADS"] = "2" dictc = gcmddict.copy() solv_file = dictc['solv_file'] solu_dirs = dictc['solu_dirs'] np.random.seed(seed) dictc['Nr'] = Nr dictc['molfile'] = self.cdir + 'clst' dictc['dstore'] = self.ldtdir + self.datdir + '/' solv = [hdt.read_rcdb_coordsandnm(solv_file)] if solu_dirs: solu = [ hdt.read_rcdb_coordsandnm(solu_dirs + f) for f in os.listdir(solu_dirs) ] else: solu = [] dgen = pmf.clustergenerator(self.netdict['cnstfile'], self.netdict['saefile'], self.netdict['nnfprefix'], self.netdict['num_nets'], solv, solu, gpuid) dgen.generate_clusters(dictc, mol_sizes, mol_temps, tid)
def mol_dyn_sampling(self,md_work, i, N, T1, T2, dt, Nc, Ns, sig, gpuid): activ = aat.moldynactivelearning(self.netdict['cnstfile'], self.netdict['saefile'], self.netdict['nnfprefix'], self.netdict['num_nets'], gpuid) difo = open(self.ldtdir + self.datdir + '/info_data_mdso-'+str(i)+'.nfo', 'w') Nmol = 0 dnfo = 'MD Sampler running: ' + str(md_work.size) difo.write(dnfo + '\n') Nmol = md_work.size ftme_t = 0.0 for di, id in enumerate(md_work): data = hdt.read_rcdb_coordsandnm(id) #print(di, ') Working on', id, '...') S = data["species"] if "charge" in data and "multip" in data: chg = data["charge"] mlt = data["multip"] else: chg = "0" mlt = "1" # Set mols activ.setmol(data["coordinates"], S) # Generate conformations X = activ.generate_conformations(N, T1, T2, dt, Nc, Ns, dS=sig) ftme_t += activ.failtime nfo = activ._infostr_ m = id.rsplit('/',1)[1].rsplit('.',1)[0] difo.write(' -' + m + ': ' + nfo + '\n') difo.flush() #print(nfo) if X.size > 0: hdt.writexyzfilewc(self.cdir + 'mds_' + m.split('.')[0] + '_' + str(i).zfill(2) + str(di).zfill(4) + '.xyz', X, S, ' '+chg+' '+mlt) difo.write('Complete mean fail time: ' + "{:.2f}".format(ftme_t / float(Nmol)) + '\n') print(Nmol) del activ difo.close()
mols = [] difo = open(dstore + 'info_data_mddimer.nfo', 'w') for di, id in enumerate(idir): files = os.listdir(id[1]) random.shuffle(files) dnfo = str(di) + ' of ' + str( len(idir)) + ') dir: ' + str(id) + ' Selecting: ' + str( id[0] * len(files)) print(dnfo) difo.write(dnfo + '\n') for i in range(id[0]): for n, m in enumerate(files): data = hdn.read_rcdb_coordsandnm(id[1] + m) mols.append(data) dgen = pmf.dimergenerator(cnstfilecv, saefilecv, nnfprefix, 5, mols) difo.write('Beginning dimer generation...\n') Nt = 0 Nd = 0 for i in range(Nr): dgen.init_dynamics(Nm, V, L, dt, T) dgen.run_dynamics(Ni, xyzfile, trajfile) dgen.__fragmentbox__(molfile + str(i).zfill(4) + '_') Nt += dgen.Nt
# Set required files for pyNeuroChem anipath = '/home/jujuman/Dropbox/ChemSciencePaper.AER/networks/ANI-c08f-ntwk/' cnstfile = anipath + '/rHCNO-4.6A_16-3.1A_a4-8.params' saefile = anipath + '/sae_6-31gd.dat' nnfdir = anipath + '/networks/' idir = '/home/jujuman/Scratch/Research/GDB-11-wB97X-6-31gd/dnntsgdb11_07/inputs/' sdir = '/home/jujuman/Scratch/Research/GDB-11-wB97X-6-31gd/dnntsgdb11_07/inputs_new/' # Construct pyNeuroChem class nc = pync.molecule(cnstfile, saefile, nnfdir, 0) files = [f for f in os.listdir(idir) if f.split(".")[1] == "ipt"] for i, f in enumerate(files): data = hdn.read_rcdb_coordsandnm(idir + f) X = data['coordinates'] S = data['species'] mol = Atoms(positions=X, symbols=S) mol.set_calculator(ANI(False)) mol.calc.setnc(nc) dyn = LBFGS(mol, logfile='optimization.log') dyn.run(fmax=0.00001, steps=1000) X = mol.get_positions() Nc = int(f.split(".")[0].split("-")[1]) Fp = f.split("-")[0]
def dimer_sampling(self, tid, Nr, dparam, gpuid): mds_select = dparam['mdselect'] #N = dparam['N'] T = dparam['T'] L = dparam['L'] V = dparam['V'] maxNa = dparam['maxNa'] dt = dparam['dt'] sig = dparam['sig'] Nm = dparam['Nm'] Ni = dparam['Ni'] Ns = dparam['Ns'] mols = [] difo = open( self.ldtdir + self.datdir + '/info_data_mddimer-' + str(tid) + '.nfo', 'w') for di, id in enumerate(dparam['mdselect']): files = os.listdir(self.idir[id[1]]) random.shuffle(files) dnfo = str(di) + ' of ' + str(len( dparam['mdselect'])) + ') dir: ' + str( self.idir[id[1]]) + ' Selecting: ' + str( id[0] * len(files)) #print(dnfo) difo.write(dnfo + '\n') for i in range(id[0]): for n, m in enumerate(files): data = hdt.read_rcdb_coordsandnm(self.idir[id[1]] + m) if len(data['species']) < maxNa: mols.append(data) dgen = pmf.dimergenerator(self.netdict['cnstfile'], self.netdict['saefile'], self.netdict['nnfprefix'], self.netdict['num_nets'], mols, gpuid) difo.write('Beginning dimer generation...\n') Nt = 0 Nd = 0 for i in range(Nr): dgen.init_dynamics(Nm, V, L, dt, T) for j in range(Ns): if j != 0: dgen.run_dynamics(Ni) fname = self.cdir + 'dimer-' + str(tid).zfill(2) + str( i).zfill(2) + '-' + str(j).zfill(2) + '_' max_sig = dgen.__fragmentbox__(fname, sig) print('MaxSig:', max_sig) #difo.write('Step ('+str(i)+',',+str(j)+') ['+ str(dgen.Nd)+ '/'+ str(dgen.Nt)+']\n') difo.write('Step (' + str(i) + ',' + str(j) + ') [' + str(dgen.Nd) + '/' + str(dgen.Nt) + '] max sigma: ' + "{:.2f}".format(max_sig) + ' generated ' + str(len(dgen.frag_list)) + ' dimers...\n') Nt += dgen.Nt Nd += dgen.Nd #print('Step (',tid,',',i,') [', str(dgen.Nd), '/', str(dgen.Nt),'] generated ',len(dgen.frag_list), 'dimers...') #difo.write('Step ('+str(i)+') ['+ str(dgen.Nd)+ '/'+ str(dgen.Nt)+'] generated '+str(len(dgen.frag_list))+'dimers...\n') if max_sig > 3.0 * sig: difo.write('Terminating dynamics -- max sigma: ' + "{:.2f}".format(max_sig) + ' Ran for: ' + "{:.2f}".format(j * Ni * dt) + 'fs\n') break difo.write('Generated ' + str(Nd) + ' of ' + str(Nt) + ' tested dimers. Percent: ' + "{:.2f}".format(100.0 * Nd / float(Nt))) difo.close()
def normal_mode_sampling(self, T, Ngen, Nkep, maxd, sig, gpuid): of = open(self.ldtdir + self.datdir + '/info_data_nms.nfo', 'w') aevsize = self.netdict['aevsize'] anicv = aat.anicrossvalidationconformer(self.netdict['cnstfile'], self.netdict['saefile'], self.netdict['nnfprefix'], self.netdict['num_nets'], [gpuid], False) dc = aat.diverseconformers(self.netdict['cnstfile'], self.netdict['saefile'], self.netdict['nnfprefix'] + '0/networks/', aevsize, gpuid, False) Nkp = 0 Nkt = 0 Ntt = 0 idx = 0 for di, id in enumerate(self.idir): of.write( str(di) + ' of ' + str(len(self.idir)) + ') dir: ' + str(id) + '\n') #print(di,'of',len(self.idir),') dir:', id) files = os.listdir(id) files.sort() Nk = 0 Nt = 0 for fi, f in enumerate(files): print(f) data = hdt.read_rcdb_coordsandnm(id + f) #print(id+f) spc = data["species"] xyz = data["coordinates"] nmc = data["nmdisplacements"] frc = data["forceconstant"] if "charge" in data and "multip" in data: chg = data["charge"] mlt = data["multip"] else: chg = "0" mlt = "1" nms = nmt.nmsgenerator(xyz, nmc, frc, spc, T, minfc=5.0E-2, maxd=maxd) conformers = nms.get_Nrandom_structures(Ngen) if conformers.shape[0] > 0: if conformers.shape[0] > Nkep: ids = dc.get_divconfs_ids(conformers, spc, Ngen, Nkep, []) conformers = conformers[ids] sigma = anicv.compute_stddev_conformations(conformers, spc) sid = np.where(sigma > sig)[0] Nt += sigma.size Nk += sid.size if 100.0 * sid.size / float(Ngen) > 0: Nkp += sid.size cfn = f.split('.')[0].split('-')[0] + '_' + str( idx).zfill(5) + '-' + f.split('.')[0].split( '-')[1] + '_2.xyz' cmts = [' ' + chg + ' ' + mlt for c in range(Nk)] hdt.writexyzfilewc(self.cdir + cfn, conformers[sid], spc, cmts) idx += 1 Nkt += Nk Ntt += Nt of.write(' -Total: ' + str(Nk) + ' of ' + str(Nt) + ' percent: ' + "{:.2f}".format(100.0 * Nk / Nt) + '\n') of.flush() #print(' -Total:',Nk,'of',Nt,'percent:',"{:.2f}".format(100.0*Nk/Nt)) del anicv del dc of.write('\nGrand Total: ' + str(Nkt) + ' of ' + str(Ntt) + ' percent: ' + "{:.2f}".format(100.0 * Nkt / Ntt) + ' Kept ' + str(Nkp) + '\n') #print('\nGrand Total:', Nkt, 'of', Ntt,'percent:',"{:.2f}".format(100.0*Nkt/Ntt), 'Kept',Nkp) of.close()
gcmddict = {'edgepad': 0.8, 'mindist': 1.6, 'maxsig' : 0.7, 'Nr': Nr, 'Nm': Nm, 'Ni': Ni, 'Ns': Ns, 'dt': dt, 'V': V, 'L': L, 'T': T, 'Nembed' : 0, 'molfile' : molfile, 'dstore' : dstore, } #------------------------------------------- #print(solu) solv = [hdn.read_rcdb_coordsandnm(solv_file)] #solu = [hdn.read_rcdb_coordsandnm(solu_dirs+f) for f in os.listdir(solu_dirs)] solu = [] dgen = pmf.clustergenerator(cnstfilecv, saefilecv, nnfprefix, 5, solv, solu) dgen.generate_clusters(gcmddict,0)