Esempio n. 1
0
    def generate_dhl_samples(self,MaxNa=25,fmax=0.005,fpref='dhl_scan-', freqname="vib."):
        ts = ani_tortion_scanner(self.ens, fmax)
        dhls = self.get_index_set(self.smiles, self.Nmol, MaxNa=MaxNa)
        for i, dhl in enumerate(dhls):
            mol = dhl[0]
            X, S, p, e, s = ts.tortional_sampler(mol, self.Nsamp, dhl[1], 10.0, 36, sigma=self.sigma, rng=self.rng, freqname=freqname)
            if e.size > 0:
                comment = Chem.MolToSmiles(dhl[0]) + '[' + ' '.join([str(j) for j in dhl[1]]) + ']'
                comment = [comment for i in range(e.size * self.Nsamp)]

                hdt.writexyzfilewc(self.storedir + fpref + str(i).zfill(3) + '.xyz', xyz=X, typ=S, cmt=comment)
Esempio n. 2
0
    def mol_dyn_sampling(self,md_work, i, N, T1, T2, dt, Nc, Ns, sig, gpuid):
        activ = aat.moldynactivelearning(self.netdict['cnstfile'],
                                         self.netdict['saefile'],
                                         self.netdict['nnfprefix'],
                                         self.netdict['num_nets'],
                                         gpuid)

        difo = open(self.ldtdir + self.datdir + '/info_data_mdso-'+str(i)+'.nfo', 'w')
        Nmol = 0
        dnfo = 'MD Sampler running: ' + str(md_work.size)
        difo.write(dnfo + '\n')
        Nmol = md_work.size
        ftme_t = 0.0
        for di, id in enumerate(md_work):
            data = hdt.read_rcdb_coordsandnm(id)
            #print(di, ') Working on', id, '...')
            S = data["species"]

            if "charge" in data and "multip" in data:
                chg = data["charge"]
                mlt = data["multip"]
            else:
                chg = "0"
                mlt = "1"

            # Set mols
            activ.setmol(data["coordinates"], S)

            # Generate conformations
            X = activ.generate_conformations(N, T1, T2, dt, Nc, Ns, dS=sig)

            ftme_t += activ.failtime

            nfo = activ._infostr_
            m = id.rsplit('/',1)[1].rsplit('.',1)[0]
            difo.write('  -' + m + ': ' + nfo + '\n')
            difo.flush()
            #print(nfo)

            if X.size > 0:
                hdt.writexyzfilewc(self.cdir + 'mds_' + m.split('.')[0] + '_' + str(i).zfill(2) + str(di).zfill(4) + '.xyz', X, S, ' '+chg+' '+mlt)
        difo.write('Complete mean fail time: ' + "{:.2f}".format(ftme_t / float(Nmol)) + '\n')
        print(Nmol)
        del activ
        difo.close()
Esempio n. 3
0
    def prepare_confs_iso(self):
        prefix = 'ad'
        confs_dir = self.ldtdir + self.datdir + '/confs/'
        isoms_dir = self.ldtdir + self.datdir + '/confs_iso/'

        files = os.listdir(confs_dir)
        files = [f for f in files if f.rsplit('.', maxsplit=1)[-1] == 'xyz']
        #print(len(files))

        ds = dict()
        of = open(self.ldtdir + self.datdir + '/info_confstoiso_map.dat', 'w')
        for i, f in enumerate(files):
            #print(confs_dir + f)
            X, S, N, C = hdt.readxyz2(confs_dir + f)
            S = np.array(S)

            idx = sorted(range(len(S)), key=lambda k: S[k])
            S = S[np.array(idx)]

            for j, x in enumerate(X):
                X[j] = x[idx]

            id = "".join(S)

            if id in ds:
                sid = len(ds[id])
                of.write(f + ' ' + convert_eformula(S) + ' ' + str(sid) + ' ' +
                         str(X.shape[0]) + '\n')
                ds[id].append((X, S, C))
            else:
                of.write(f + ' ' + convert_eformula(S) + ' ' + str(0) + ' ' +
                         str(X.shape[0]) + '\n')
                ds.update({id: [(X, S, C)]})
        of.close()

        Nt = 0
        for i in ds.keys():
            X = []
            S = []
            C = []
            for j in ds[i]:
                X.append(j[0])
                S.append(j[1])
                C.extend(j[2])

            X = np.vstack(X)
            S = list(S[0])
            N = X.shape[0]

            Nt += N

            if N < 40:
                #print(type(S), S)
                fn = prefix + '_' + convert_eformula(S) + '-' + str(N).zfill(
                    3) + '.xyz'
                #print('Writing: ', fn)
                hdt.writexyzfilewc(isoms_dir + '/' + fn, X, S, C)
            else:
                Nsplit = int(math.ceil(N / float(40)))
                X = np.array_split(X, Nsplit)
                C = np.array_split(np.array(C), Nsplit)
                for l, (x, c) in enumerate(zip(X, C)):
                    fn = prefix + '_' + convert_eformula(S) + '_' + str(
                        l).zfill(2) + '-' + str(x.shape[0]).zfill(3) + '.xyz'
                    hdt.writexyzfilewc(isoms_dir + '/' + fn, x, S, c)
Esempio n. 4
0
    def normal_mode_sampling(self, T, Ngen, Nkep, maxd, sig, gpuid):
        of = open(self.ldtdir + self.datdir + '/info_data_nms.nfo', 'w')

        aevsize = self.netdict['aevsize']

        anicv = aat.anicrossvalidationconformer(self.netdict['cnstfile'],
                                                self.netdict['saefile'],
                                                self.netdict['nnfprefix'],
                                                self.netdict['num_nets'],
                                                [gpuid], False)

        dc = aat.diverseconformers(self.netdict['cnstfile'],
                                   self.netdict['saefile'],
                                   self.netdict['nnfprefix'] + '0/networks/',
                                   aevsize, gpuid, False)

        Nkp = 0
        Nkt = 0
        Ntt = 0
        idx = 0
        for di, id in enumerate(self.idir):
            of.write(
                str(di) + ' of ' + str(len(self.idir)) + ') dir: ' + str(id) +
                '\n')
            #print(di,'of',len(self.idir),') dir:', id)
            files = os.listdir(id)
            files.sort()

            Nk = 0
            Nt = 0
            for fi, f in enumerate(files):
                print(f)
                data = hdt.read_rcdb_coordsandnm(id + f)

                #print(id+f)
                spc = data["species"]
                xyz = data["coordinates"]
                nmc = data["nmdisplacements"]
                frc = data["forceconstant"]

                if "charge" in data and "multip" in data:
                    chg = data["charge"]
                    mlt = data["multip"]
                else:
                    chg = "0"
                    mlt = "1"

                nms = nmt.nmsgenerator(xyz,
                                       nmc,
                                       frc,
                                       spc,
                                       T,
                                       minfc=5.0E-2,
                                       maxd=maxd)
                conformers = nms.get_Nrandom_structures(Ngen)

                if conformers.shape[0] > 0:
                    if conformers.shape[0] > Nkep:
                        ids = dc.get_divconfs_ids(conformers, spc, Ngen, Nkep,
                                                  [])
                        conformers = conformers[ids]

                    sigma = anicv.compute_stddev_conformations(conformers, spc)
                    sid = np.where(sigma > sig)[0]

                    Nt += sigma.size
                    Nk += sid.size
                    if 100.0 * sid.size / float(Ngen) > 0:
                        Nkp += sid.size
                        cfn = f.split('.')[0].split('-')[0] + '_' + str(
                            idx).zfill(5) + '-' + f.split('.')[0].split(
                                '-')[1] + '_2.xyz'
                        cmts = [' ' + chg + ' ' + mlt for c in range(Nk)]
                        hdt.writexyzfilewc(self.cdir + cfn, conformers[sid],
                                           spc, cmts)
                idx += 1

            Nkt += Nk
            Ntt += Nt
            of.write('    -Total: ' + str(Nk) + ' of ' + str(Nt) +
                     ' percent: ' + "{:.2f}".format(100.0 * Nk / Nt) + '\n')
            of.flush()
            #print('    -Total:',Nk,'of',Nt,'percent:',"{:.2f}".format(100.0*Nk/Nt))

        del anicv
        del dc

        of.write('\nGrand Total: ' + str(Nkt) + ' of ' + str(Ntt) +
                 ' percent: ' + "{:.2f}".format(100.0 * Nkt / Ntt) + ' Kept ' +
                 str(Nkp) + '\n')
        #print('\nGrand Total:', Nkt, 'of', Ntt,'percent:',"{:.2f}".format(100.0*Nkt/Ntt), 'Kept',Nkp)
        of.close()
Esempio n. 5
0
    def structural_sampling(self, N, sig, gpuid):
        of = open(self.ldtdir + self.datdir + '/info_data_strucs.nfo', 'w')

        aevsize = self.netdict['aevsize']

        anicv = aat.anicrossvalidationconformer(self.netdict['cnstfile'],
                                                self.netdict['saefile'],
                                                self.netdict['nnfprefix'],
                                                self.netdict['num_nets'],
                                                [gpuid], False)

        dc = aat.diverseconformers(self.netdict['cnstfile'],
                                   self.netdict['saefile'],
                                   self.netdict['nnfprefix']+'0/networks/',
                                   aevsize,
                                   gpuid, False)

        files = os.listdir(self.strucsfolder)
        files.sort()

        Nkt = 0
        Ntt = 0
        cnt = 0
        for fi,f in enumerate(files):
            print(f)
            fil = open(self.strucsfolder+f,'r')
            lines = fil.readlines()
            fil.close()
            nlines = len(lines)
            # Reading all conformations
            nat = int(lines[0])
            nconfs=int(round(len(lines)/(nat+2)))
            crds=[]
            for conf in range(nconfs):
                crd =[]
                if (conf==0):
                    if (not re.search("Charge:",lines[1]) or not re.search("Mul:",lines[1])):
                        raise ValueError('Error: the first comment line in %s must have charge and multiplicity. Please add something like " Charge: 0 Mul: 1 "'%(self.strucsfolder+f))
                    chg = lines[1].split("Charge:")[1].split()[0]
                    mul = lines[1].split("Mul:")[1].split()[0]
                    spc = []
                    for i in range(nat):
                        var = lines[conf*(nat+2)+2+i].split()
                        spc.append(var[0])
                        crd.append([float(var[1]),float(var[2]),float(var[3])])
                else:
                    for i in range(nat):
                        var = lines[conf*(nat+2)+2+i].split()
                        crd.append([float(var[1]),float(var[2]),float(var[3])])
                crds.append(crd)
            # Select up to N random structures, if needed
            if (nconfs>N):
                list=[]
                for i in range(N):
                    num=np.random.random_integers(0,nconfs-1)
                    while(num in list):
                        num=num=np.random.random_integers(0,nconfs-1)
                    list.append(num)
                ncrds=[]
                for i in sorted(list):
                    ncrds.append(crds[i])
                del crds
                crds=ncrds
                del ncrds
            # Converting list to numpy array
            crds=np.asarray(crds, dtype=np.float32)
            # Filter by QBC
            sigma = anicv.compute_stddev_conformations(crds,spc)
            sid = np.where( sigma >  sig )[0]

            Ntt += sigma.size
            Nkt += sid.size
            of.write(str(cnt+1)+' of '+str(len(files))+') file: '+ str(self.strucsfolder+f) +'\n')
            of.write('    -Total: '+str(sid.size)+' of '+str(sigma.size)+' percent: '+"{:.2f}".format(100.0*sid.size/sigma.size)+'\n')
            of.flush()
            if sid.size > 0:
                cfn = f.split('.')[0]+'_strucs.xyz'
                cmts = [' '+chg+' '+mul for c in range(sid.size)]
                hdt.writexyzfilewc(self.cdir+cfn,crds[sid],spc,cmts)
            cnt += 1

        of.write('\nGrand Total: '+ str(Nkt)+ ' of '+ str(Ntt)+' percent: '+"{:.2f}".format(100.0*Nkt/Ntt)+'\n')
        of.close()
Esempio n. 6
0
    def __fragmentbox__(self, file, sighat):
        self.X = self.mol.get_positions()

        self.frag_list = []

        self.Nd = 0
        self.Nt = 0

        self.maxsig = 0

        CUT = 7.0

        for i in range(len(self.Na)):
            si = self.ctd[i][2]
            di = self.ctd[i][1]
            Nai = self.Na[i]
            Xi = self.X[si:si + Nai, :]
            Xci = np.sum(Xi, axis=0) / Nai

            if np.all(Xci > CUT + di) and np.all(Xci <= self.L - (CUT + di)):

                #if np.all(Xci > di) and np.all(Xci <= self.L-di):
                Xf = Xi
                Sf = self.S[si:si + Nai]

                Nmax = np.random.randint(100, 101)
                Nmol = 0
                ridx = np.arange(len(self.Na))
                np.random.shuffle(ridx)
                for j in ridx:
                    if i != j:
                        sj = self.ctd[j][2]
                        dj = self.ctd[j][1]
                        Naj = self.Na[j]
                        Xcj = np.sum(self.X[sj:sj + Naj, :], axis=0) / Naj
                        Xj = self.X[sj:sj + Naj, :]

                        if np.all(Xcj > dj) and np.all(Xcj < self.L - dj):
                            dc = np.linalg.norm(Xci - Xcj)
                            if dc < di + dj + CUT:
                                minl = 10.0
                                for ii in range(Nai):
                                    Xiii = Xi[ii]
                                    for jj in range(Naj):
                                        Xjjj = Xj[jj]
                                        v = np.linalg.norm(Xiii - Xjjj)
                                        if v < minl:
                                            minl = v

                                #inc = np.random.uniform(0.0, 4.0)
                                #if minl < 2.0+inc and minl > 0.0+inc:
                                if minl < CUT and minl > 0.5:
                                    Xf = np.vstack([Xf, Xj])
                                    Sf.extend(self.S[sj:sj + Naj])
                                    Nmol += 1
                        if Nmol >= Nmax:
                            break

                Xcf = np.sum(Xf, axis=0) / float(len(Sf))
                Xf = Xf

                E = np.empty(len(self.aens.ncl), dtype=np.float64)
                for idx, nc in enumerate(self.aens.ncl):
                    nc.setMolecule(coords=np.array(Xf, dtype=np.float32),
                                   types=Sf)
                    E[idx] = nc.energy()[0]

                sig = np.std(hdn.hatokcal * E) / np.sqrt(Nai + Naj)
                #print('Mol(',i,'): sig=',sig)

                self.Nt += 1
                if sig > sighat:
                    if sig > self.maxsig:
                        self.maxsig = sig
                    self.Nd += 1
                    hdn.writexyzfilewc(file + str(i).zfill(4) + '.xyz',
                                       Xf.reshape(1, Xf.shape[0], 3),
                                       Sf,
                                       cmt=['sigma:' + str(sig)])
                    self.frag_list.append(dict({'coords': Xf, 'spec': Sf}))