#P = 0.25 atmlist = [] idir = '/home/jujuman/Research/extensibility_test_sets/COMP6v2/TripeptideS/inputs/' cdir = '/home/jujuman/Research/extensibility_test_sets/COMP6v2/TripeptideS/confs/' #idir = '/home/jujuman/Research/GDB-11-wB97X-6-31gd/dnnts_h2ocluster/h2o_cluster/inputs/' #cdir = '/home/jujuman/Research/GDB-11-wB97X-6-31gd/dnnts_h2ocluster/h2o_cluster/confs/' files = os.listdir(idir) files.sort() if not os.path.exists(cdir): os.mkdir(cdir) dc = aat.diverseconformers(cnstfile, saefile, nnfdir, aevsize, 0, False) for fi, f in enumerate(files): print(fi, 'of', len(files), ') Working on:', f) data = hdn.read_rcdb_coordsandnm(idir + f) spc = data["species"] xyz = data["coordinates"] nmc = data["nmdisplacements"] frc = data["forceconstant"] if set(['P', 'B', 'Br']).isdisjoint(set(spc)): #Ngen = K*frc.size #Nkep = int(Ngen*P) Ngen = K * 8
def normal_mode_sampling(self, T, Ngen, Nkep, maxd, sig, gpuid): of = open(self.ldtdir + self.datdir + '/info_data_nms.nfo', 'w') aevsize = self.netdict['aevsize'] anicv = aat.anicrossvalidationconformer(self.netdict['cnstfile'], self.netdict['saefile'], self.netdict['nnfprefix'], self.netdict['num_nets'], [gpuid], False) dc = aat.diverseconformers(self.netdict['cnstfile'], self.netdict['saefile'], self.netdict['nnfprefix'] + '0/networks/', aevsize, gpuid, False) Nkp = 0 Nkt = 0 Ntt = 0 idx = 0 for di, id in enumerate(self.idir): of.write( str(di) + ' of ' + str(len(self.idir)) + ') dir: ' + str(id) + '\n') #print(di,'of',len(self.idir),') dir:', id) files = os.listdir(id) files.sort() Nk = 0 Nt = 0 for fi, f in enumerate(files): print(f) data = hdt.read_rcdb_coordsandnm(id + f) #print(id+f) spc = data["species"] xyz = data["coordinates"] nmc = data["nmdisplacements"] frc = data["forceconstant"] if "charge" in data and "multip" in data: chg = data["charge"] mlt = data["multip"] else: chg = "0" mlt = "1" nms = nmt.nmsgenerator(xyz, nmc, frc, spc, T, minfc=5.0E-2, maxd=maxd) conformers = nms.get_Nrandom_structures(Ngen) if conformers.shape[0] > 0: if conformers.shape[0] > Nkep: ids = dc.get_divconfs_ids(conformers, spc, Ngen, Nkep, []) conformers = conformers[ids] sigma = anicv.compute_stddev_conformations(conformers, spc) sid = np.where(sigma > sig)[0] Nt += sigma.size Nk += sid.size if 100.0 * sid.size / float(Ngen) > 0: Nkp += sid.size cfn = f.split('.')[0].split('-')[0] + '_' + str( idx).zfill(5) + '-' + f.split('.')[0].split( '-')[1] + '_2.xyz' cmts = [' ' + chg + ' ' + mlt for c in range(Nk)] hdt.writexyzfilewc(self.cdir + cfn, conformers[sid], spc, cmts) idx += 1 Nkt += Nk Ntt += Nt of.write(' -Total: ' + str(Nk) + ' of ' + str(Nt) + ' percent: ' + "{:.2f}".format(100.0 * Nk / Nt) + '\n') of.flush() #print(' -Total:',Nk,'of',Nt,'percent:',"{:.2f}".format(100.0*Nk/Nt)) del anicv del dc of.write('\nGrand Total: ' + str(Nkt) + ' of ' + str(Ntt) + ' percent: ' + "{:.2f}".format(100.0 * Nkt / Ntt) + ' Kept ' + str(Nkp) + '\n') #print('\nGrand Total:', Nkt, 'of', Ntt,'percent:',"{:.2f}".format(100.0*Nkt/Ntt), 'Kept',Nkp) of.close()
def structural_sampling(self, N, sig, gpuid): of = open(self.ldtdir + self.datdir + '/info_data_strucs.nfo', 'w') aevsize = self.netdict['aevsize'] anicv = aat.anicrossvalidationconformer(self.netdict['cnstfile'], self.netdict['saefile'], self.netdict['nnfprefix'], self.netdict['num_nets'], [gpuid], False) dc = aat.diverseconformers(self.netdict['cnstfile'], self.netdict['saefile'], self.netdict['nnfprefix']+'0/networks/', aevsize, gpuid, False) files = os.listdir(self.strucsfolder) files.sort() Nkt = 0 Ntt = 0 cnt = 0 for fi,f in enumerate(files): print(f) fil = open(self.strucsfolder+f,'r') lines = fil.readlines() fil.close() nlines = len(lines) # Reading all conformations nat = int(lines[0]) nconfs=int(round(len(lines)/(nat+2))) crds=[] for conf in range(nconfs): crd =[] if (conf==0): if (not re.search("Charge:",lines[1]) or not re.search("Mul:",lines[1])): raise ValueError('Error: the first comment line in %s must have charge and multiplicity. Please add something like " Charge: 0 Mul: 1 "'%(self.strucsfolder+f)) chg = lines[1].split("Charge:")[1].split()[0] mul = lines[1].split("Mul:")[1].split()[0] spc = [] for i in range(nat): var = lines[conf*(nat+2)+2+i].split() spc.append(var[0]) crd.append([float(var[1]),float(var[2]),float(var[3])]) else: for i in range(nat): var = lines[conf*(nat+2)+2+i].split() crd.append([float(var[1]),float(var[2]),float(var[3])]) crds.append(crd) # Select up to N random structures, if needed if (nconfs>N): list=[] for i in range(N): num=np.random.random_integers(0,nconfs-1) while(num in list): num=num=np.random.random_integers(0,nconfs-1) list.append(num) ncrds=[] for i in sorted(list): ncrds.append(crds[i]) del crds crds=ncrds del ncrds # Converting list to numpy array crds=np.asarray(crds, dtype=np.float32) # Filter by QBC sigma = anicv.compute_stddev_conformations(crds,spc) sid = np.where( sigma > sig )[0] Ntt += sigma.size Nkt += sid.size of.write(str(cnt+1)+' of '+str(len(files))+') file: '+ str(self.strucsfolder+f) +'\n') of.write(' -Total: '+str(sid.size)+' of '+str(sigma.size)+' percent: '+"{:.2f}".format(100.0*sid.size/sigma.size)+'\n') of.flush() if sid.size > 0: cfn = f.split('.')[0]+'_strucs.xyz' cmts = [' '+chg+' '+mul for c in range(sid.size)] hdt.writexyzfilewc(self.cdir+cfn,crds[sid],spc,cmts) cnt += 1 of.write('\nGrand Total: '+ str(Nkt)+ ' of '+ str(Ntt)+' percent: '+"{:.2f}".format(100.0*Nkt/Ntt)+'\n') of.close()