def generate_dhl_samples(self,MaxNa=25,fmax=0.005,fpref='dhl_scan-', freqname="vib."): ts = ani_tortion_scanner(self.ens, fmax) dhls = self.get_index_set(self.smiles, self.Nmol, MaxNa=MaxNa) for i, dhl in enumerate(dhls): mol = dhl[0] X, S, p, e, s = ts.tortional_sampler(mol, self.Nsamp, dhl[1], 10.0, 36, sigma=self.sigma, rng=self.rng, freqname=freqname) if e.size > 0: comment = Chem.MolToSmiles(dhl[0]) + '[' + ' '.join([str(j) for j in dhl[1]]) + ']' comment = [comment for i in range(e.size * self.Nsamp)] hdt.writexyzfilewc(self.storedir + fpref + str(i).zfill(3) + '.xyz', xyz=X, typ=S, cmt=comment)
def mol_dyn_sampling(self,md_work, i, N, T1, T2, dt, Nc, Ns, sig, gpuid): activ = aat.moldynactivelearning(self.netdict['cnstfile'], self.netdict['saefile'], self.netdict['nnfprefix'], self.netdict['num_nets'], gpuid) difo = open(self.ldtdir + self.datdir + '/info_data_mdso-'+str(i)+'.nfo', 'w') Nmol = 0 dnfo = 'MD Sampler running: ' + str(md_work.size) difo.write(dnfo + '\n') Nmol = md_work.size ftme_t = 0.0 for di, id in enumerate(md_work): data = hdt.read_rcdb_coordsandnm(id) #print(di, ') Working on', id, '...') S = data["species"] if "charge" in data and "multip" in data: chg = data["charge"] mlt = data["multip"] else: chg = "0" mlt = "1" # Set mols activ.setmol(data["coordinates"], S) # Generate conformations X = activ.generate_conformations(N, T1, T2, dt, Nc, Ns, dS=sig) ftme_t += activ.failtime nfo = activ._infostr_ m = id.rsplit('/',1)[1].rsplit('.',1)[0] difo.write(' -' + m + ': ' + nfo + '\n') difo.flush() #print(nfo) if X.size > 0: hdt.writexyzfilewc(self.cdir + 'mds_' + m.split('.')[0] + '_' + str(i).zfill(2) + str(di).zfill(4) + '.xyz', X, S, ' '+chg+' '+mlt) difo.write('Complete mean fail time: ' + "{:.2f}".format(ftme_t / float(Nmol)) + '\n') print(Nmol) del activ difo.close()
def prepare_confs_iso(self): prefix = 'ad' confs_dir = self.ldtdir + self.datdir + '/confs/' isoms_dir = self.ldtdir + self.datdir + '/confs_iso/' files = os.listdir(confs_dir) files = [f for f in files if f.rsplit('.', maxsplit=1)[-1] == 'xyz'] #print(len(files)) ds = dict() of = open(self.ldtdir + self.datdir + '/info_confstoiso_map.dat', 'w') for i, f in enumerate(files): #print(confs_dir + f) X, S, N, C = hdt.readxyz2(confs_dir + f) S = np.array(S) idx = sorted(range(len(S)), key=lambda k: S[k]) S = S[np.array(idx)] for j, x in enumerate(X): X[j] = x[idx] id = "".join(S) if id in ds: sid = len(ds[id]) of.write(f + ' ' + convert_eformula(S) + ' ' + str(sid) + ' ' + str(X.shape[0]) + '\n') ds[id].append((X, S, C)) else: of.write(f + ' ' + convert_eformula(S) + ' ' + str(0) + ' ' + str(X.shape[0]) + '\n') ds.update({id: [(X, S, C)]}) of.close() Nt = 0 for i in ds.keys(): X = [] S = [] C = [] for j in ds[i]: X.append(j[0]) S.append(j[1]) C.extend(j[2]) X = np.vstack(X) S = list(S[0]) N = X.shape[0] Nt += N if N < 40: #print(type(S), S) fn = prefix + '_' + convert_eformula(S) + '-' + str(N).zfill( 3) + '.xyz' #print('Writing: ', fn) hdt.writexyzfilewc(isoms_dir + '/' + fn, X, S, C) else: Nsplit = int(math.ceil(N / float(40))) X = np.array_split(X, Nsplit) C = np.array_split(np.array(C), Nsplit) for l, (x, c) in enumerate(zip(X, C)): fn = prefix + '_' + convert_eformula(S) + '_' + str( l).zfill(2) + '-' + str(x.shape[0]).zfill(3) + '.xyz' hdt.writexyzfilewc(isoms_dir + '/' + fn, x, S, c)
def normal_mode_sampling(self, T, Ngen, Nkep, maxd, sig, gpuid): of = open(self.ldtdir + self.datdir + '/info_data_nms.nfo', 'w') aevsize = self.netdict['aevsize'] anicv = aat.anicrossvalidationconformer(self.netdict['cnstfile'], self.netdict['saefile'], self.netdict['nnfprefix'], self.netdict['num_nets'], [gpuid], False) dc = aat.diverseconformers(self.netdict['cnstfile'], self.netdict['saefile'], self.netdict['nnfprefix'] + '0/networks/', aevsize, gpuid, False) Nkp = 0 Nkt = 0 Ntt = 0 idx = 0 for di, id in enumerate(self.idir): of.write( str(di) + ' of ' + str(len(self.idir)) + ') dir: ' + str(id) + '\n') #print(di,'of',len(self.idir),') dir:', id) files = os.listdir(id) files.sort() Nk = 0 Nt = 0 for fi, f in enumerate(files): print(f) data = hdt.read_rcdb_coordsandnm(id + f) #print(id+f) spc = data["species"] xyz = data["coordinates"] nmc = data["nmdisplacements"] frc = data["forceconstant"] if "charge" in data and "multip" in data: chg = data["charge"] mlt = data["multip"] else: chg = "0" mlt = "1" nms = nmt.nmsgenerator(xyz, nmc, frc, spc, T, minfc=5.0E-2, maxd=maxd) conformers = nms.get_Nrandom_structures(Ngen) if conformers.shape[0] > 0: if conformers.shape[0] > Nkep: ids = dc.get_divconfs_ids(conformers, spc, Ngen, Nkep, []) conformers = conformers[ids] sigma = anicv.compute_stddev_conformations(conformers, spc) sid = np.where(sigma > sig)[0] Nt += sigma.size Nk += sid.size if 100.0 * sid.size / float(Ngen) > 0: Nkp += sid.size cfn = f.split('.')[0].split('-')[0] + '_' + str( idx).zfill(5) + '-' + f.split('.')[0].split( '-')[1] + '_2.xyz' cmts = [' ' + chg + ' ' + mlt for c in range(Nk)] hdt.writexyzfilewc(self.cdir + cfn, conformers[sid], spc, cmts) idx += 1 Nkt += Nk Ntt += Nt of.write(' -Total: ' + str(Nk) + ' of ' + str(Nt) + ' percent: ' + "{:.2f}".format(100.0 * Nk / Nt) + '\n') of.flush() #print(' -Total:',Nk,'of',Nt,'percent:',"{:.2f}".format(100.0*Nk/Nt)) del anicv del dc of.write('\nGrand Total: ' + str(Nkt) + ' of ' + str(Ntt) + ' percent: ' + "{:.2f}".format(100.0 * Nkt / Ntt) + ' Kept ' + str(Nkp) + '\n') #print('\nGrand Total:', Nkt, 'of', Ntt,'percent:',"{:.2f}".format(100.0*Nkt/Ntt), 'Kept',Nkp) of.close()
def structural_sampling(self, N, sig, gpuid): of = open(self.ldtdir + self.datdir + '/info_data_strucs.nfo', 'w') aevsize = self.netdict['aevsize'] anicv = aat.anicrossvalidationconformer(self.netdict['cnstfile'], self.netdict['saefile'], self.netdict['nnfprefix'], self.netdict['num_nets'], [gpuid], False) dc = aat.diverseconformers(self.netdict['cnstfile'], self.netdict['saefile'], self.netdict['nnfprefix']+'0/networks/', aevsize, gpuid, False) files = os.listdir(self.strucsfolder) files.sort() Nkt = 0 Ntt = 0 cnt = 0 for fi,f in enumerate(files): print(f) fil = open(self.strucsfolder+f,'r') lines = fil.readlines() fil.close() nlines = len(lines) # Reading all conformations nat = int(lines[0]) nconfs=int(round(len(lines)/(nat+2))) crds=[] for conf in range(nconfs): crd =[] if (conf==0): if (not re.search("Charge:",lines[1]) or not re.search("Mul:",lines[1])): raise ValueError('Error: the first comment line in %s must have charge and multiplicity. Please add something like " Charge: 0 Mul: 1 "'%(self.strucsfolder+f)) chg = lines[1].split("Charge:")[1].split()[0] mul = lines[1].split("Mul:")[1].split()[0] spc = [] for i in range(nat): var = lines[conf*(nat+2)+2+i].split() spc.append(var[0]) crd.append([float(var[1]),float(var[2]),float(var[3])]) else: for i in range(nat): var = lines[conf*(nat+2)+2+i].split() crd.append([float(var[1]),float(var[2]),float(var[3])]) crds.append(crd) # Select up to N random structures, if needed if (nconfs>N): list=[] for i in range(N): num=np.random.random_integers(0,nconfs-1) while(num in list): num=num=np.random.random_integers(0,nconfs-1) list.append(num) ncrds=[] for i in sorted(list): ncrds.append(crds[i]) del crds crds=ncrds del ncrds # Converting list to numpy array crds=np.asarray(crds, dtype=np.float32) # Filter by QBC sigma = anicv.compute_stddev_conformations(crds,spc) sid = np.where( sigma > sig )[0] Ntt += sigma.size Nkt += sid.size of.write(str(cnt+1)+' of '+str(len(files))+') file: '+ str(self.strucsfolder+f) +'\n') of.write(' -Total: '+str(sid.size)+' of '+str(sigma.size)+' percent: '+"{:.2f}".format(100.0*sid.size/sigma.size)+'\n') of.flush() if sid.size > 0: cfn = f.split('.')[0]+'_strucs.xyz' cmts = [' '+chg+' '+mul for c in range(sid.size)] hdt.writexyzfilewc(self.cdir+cfn,crds[sid],spc,cmts) cnt += 1 of.write('\nGrand Total: '+ str(Nkt)+ ' of '+ str(Ntt)+' percent: '+"{:.2f}".format(100.0*Nkt/Ntt)+'\n') of.close()
def __fragmentbox__(self, file, sighat): self.X = self.mol.get_positions() self.frag_list = [] self.Nd = 0 self.Nt = 0 self.maxsig = 0 CUT = 7.0 for i in range(len(self.Na)): si = self.ctd[i][2] di = self.ctd[i][1] Nai = self.Na[i] Xi = self.X[si:si + Nai, :] Xci = np.sum(Xi, axis=0) / Nai if np.all(Xci > CUT + di) and np.all(Xci <= self.L - (CUT + di)): #if np.all(Xci > di) and np.all(Xci <= self.L-di): Xf = Xi Sf = self.S[si:si + Nai] Nmax = np.random.randint(100, 101) Nmol = 0 ridx = np.arange(len(self.Na)) np.random.shuffle(ridx) for j in ridx: if i != j: sj = self.ctd[j][2] dj = self.ctd[j][1] Naj = self.Na[j] Xcj = np.sum(self.X[sj:sj + Naj, :], axis=0) / Naj Xj = self.X[sj:sj + Naj, :] if np.all(Xcj > dj) and np.all(Xcj < self.L - dj): dc = np.linalg.norm(Xci - Xcj) if dc < di + dj + CUT: minl = 10.0 for ii in range(Nai): Xiii = Xi[ii] for jj in range(Naj): Xjjj = Xj[jj] v = np.linalg.norm(Xiii - Xjjj) if v < minl: minl = v #inc = np.random.uniform(0.0, 4.0) #if minl < 2.0+inc and minl > 0.0+inc: if minl < CUT and minl > 0.5: Xf = np.vstack([Xf, Xj]) Sf.extend(self.S[sj:sj + Naj]) Nmol += 1 if Nmol >= Nmax: break Xcf = np.sum(Xf, axis=0) / float(len(Sf)) Xf = Xf E = np.empty(len(self.aens.ncl), dtype=np.float64) for idx, nc in enumerate(self.aens.ncl): nc.setMolecule(coords=np.array(Xf, dtype=np.float32), types=Sf) E[idx] = nc.energy()[0] sig = np.std(hdn.hatokcal * E) / np.sqrt(Nai + Naj) #print('Mol(',i,'): sig=',sig) self.Nt += 1 if sig > sighat: if sig > self.maxsig: self.maxsig = sig self.Nd += 1 hdn.writexyzfilewc(file + str(i).zfill(4) + '.xyz', Xf.reshape(1, Xf.shape[0], 3), Sf, cmt=['sigma:' + str(sig)]) self.frag_list.append(dict({'coords': Xf, 'spec': Sf}))