Exemple #1
0
    def prepare_confs_iso(self):
        prefix = 'ad'
        confs_dir = self.ldtdir + self.datdir + '/confs/'
        isoms_dir = self.ldtdir + self.datdir + '/confs_iso/'

        files = os.listdir(confs_dir)
        files = [f for f in files if f.rsplit('.', maxsplit=1)[-1] == 'xyz']
        #print(len(files))

        ds = dict()
        of = open(self.ldtdir + self.datdir + '/info_confstoiso_map.dat', 'w')
        for i, f in enumerate(files):
            #print(confs_dir + f)
            X, S, N, C = hdt.readxyz2(confs_dir + f)
            S = np.array(S)

            idx = sorted(range(len(S)), key=lambda k: S[k])
            S = S[np.array(idx)]

            for j, x in enumerate(X):
                X[j] = x[idx]

            id = "".join(S)

            if id in ds:
                sid = len(ds[id])
                of.write(f + ' ' + convert_eformula(S) + ' ' + str(sid) + ' ' + str(X.shape[0]) + '\n')
                ds[id].append((X, S))
            else:
                of.write(f + ' ' + convert_eformula(S) + ' ' + str(0) + ' ' + str(X.shape[0]) + '\n')
                ds.update({id: [(X, S)]})
        of.close()

        Nt = 0
        for i in ds.keys():
            X = []
            S = []
            for j in ds[i]:
                X.append(j[0])
                S.append(j[1])

            X = np.vstack(X)
            S = list(S[0])
            N = X.shape[0]

            Nt += N

            if N < 96:
                #print(type(S), S)
                fn = prefix + '_' + convert_eformula(S) + '-' + str(N).zfill(3) + '.xyz'
                #print('Writing: ', fn)
                hdt.writexyzfile(isoms_dir + '/' + fn, X, S)
            else:
                Nsplit = int(math.ceil(N/float(96)))
                X = np.array_split(X, Nsplit)
                for l,x in enumerate(X):
                    fn = prefix + '_' + convert_eformula(S) + '_'  + str(l).zfill(2)  + '-' + str(x.shape[0]).zfill(3) + '.xyz'
                    hdt.writexyzfile(isoms_dir + '/' + fn, x, S)
Exemple #2
0
 def TS_sampling(self, tid, TS_infiles, tsparams, gpuid):
     activ = aat.MD_Sampler(TS_infiles, self.netdict['cnstfile'],
                            self.netdict['saefile'],
                            self.netdict['nnfprefix'],
                            self.netdict['num_nets'], gpuid)
     T = tsparams['T']
     sig = tsparams['sig']
     Ns = tsparams['n_samples']
     n_steps = tsparams['n_steps']
     steps = tsparams['steps']
     min_steps = tsparams['min_steps']
     nm = tsparams['normalmode']
     displacement = tsparams['displacement']
     difo = open(
         self.ldtdir + self.datdir + '/info_tssampler-' + str(tid) + '.nfo',
         'w')
     for f in TS_infiles:
         X = []
         ftme_t = 0.0
         fail_count = 0
         sumsig = 0.0
         for i in range(Ns):
             x, S, t, stddev, fail, temp = activ.run_md(
                 f,
                 T,
                 steps,
                 n_steps,
                 nmfile=f.rsplit(".", 1)[0] + '.log',
                 displacement=displacement,
                 min_steps=min_steps,
                 sig=sig,
                 nm=nm)
             sumsig += stddev
             if fail:
                 #print('Job '+str(i)+' failed in '+"{:.2f}".format(t)+' Sigma: ' + "{:.2f}".format(stddev)+' SetTemp: '+"{:.2f}".format(temp))
                 difo.write('Job ' + str(i) + ' failed in ' +
                            "{:.2f}".format(t) + 'fs Sigma: ' +
                            "{:.2f}".format(stddev) + ' SetTemp: ' +
                            "{:.2f}".format(temp) + '\n')
                 X.append(x[np.newaxis, :, :])
                 fail_count += 1
             else:
                 #print('Job '+str(i)+' succeeded.')
                 difo.write('Job ' + str(i) + ' succeeded.\n')
             ftme_t += t
         print('Complete mean fail time: ' +
               "{:.2f}".format(ftme_t / float(Ns)) + ' failed ' +
               str(fail_count) + '/' + str(Ns) + '\n')
         difo.write('Complete mean fail time: ' +
                    "{:.2f}".format(ftme_t / float(Ns)) + ' failed ' +
                    str(fail_count) + '/' + str(Ns) + ' MeanSig: ' +
                    "{:.2f}".format(sumsig / float(Ns)) + '\n')
         X = np.vstack(X)
         hdt.writexyzfile(self.cdir + os.path.basename(f), X, S)
     del activ
     difo.close()
Exemple #3
0
    def mol_dyn_sampling(self, md_work, i, N, T1, T2, dt, Nc, Ns, sig, gpuid):
        activ = aat.moldynactivelearning(self.netdict['cnstfile'],
                                         self.netdict['saefile'],
                                         self.netdict['nnfprefix'],
                                         self.netdict['num_nets'], gpuid)

        difo = open(
            self.ldtdir + self.datdir + '/info_data_mdso-' + str(i) + '.nfo',
            'w')
        Nmol = 0
        dnfo = 'MD Sampler running: ' + str(md_work.size)
        difo.write(dnfo + '\n')
        Nmol = md_work.size
        ftme_t = 0.0
        for di, id in enumerate(md_work):
            data = hdt.read_rcdb_coordsandnm(id)
            #print(di, ') Working on', id, '...')
            S = data["species"]

            # Set mols
            activ.setmol(data["coordinates"], S)

            # Generate conformations
            X = activ.generate_conformations(N, T1, T2, dt, Nc, Ns, dS=sig)

            ftme_t += activ.failtime

            nfo = activ._infostr_
            m = id.rsplit('/', 1)[1].rsplit('.', 1)[0]
            difo.write('  -' + m + ': ' + nfo + '\n')
            difo.flush()
            #print(nfo)

            if X.size > 0:
                hdt.writexyzfile(
                    self.cdir + 'mds_' + m.split('.')[0] + '_' +
                    str(i).zfill(2) + str(di).zfill(4) + '.xyz', X, S)
        difo.write('Complete mean fail time: ' +
                   "{:.2f}".format(ftme_t / float(Nmol)) + '\n')
        print(Nmol)
        del activ
        difo.close()
Exemple #4
0
storedir = '/home/jujuman/Research/extensibility_test_sets/drugbank/'

suppl = Chem.SDMolSupplier(
    '/home/jujuman/Dropbox/ChemSciencePaper.AER/Benchmark_Datasets/drugbank/drugbank_3d_1564.sdf',
    removeHs=False)
for id, m in enumerate(suppl):
    if m is None: continue

    name = m.GetProp('_Name')
    xyz, spc = pya.__convert_rdkitconfs_to_nparr__(m)

    print(xyz.shape)

    print(name, id, spc)

    hdt.writexyzfile(storedir + 'xyz/drugbank_' + str(id).zfill(4) + '.xyz',
                     xyz, spc)
    hdt.write_rcdb_input(xyz[0],
                         spc,
                         id,
                         storedir,
                         'drugbank',
                         10,
                         'wb97x/6-31g*',
                         '300.0',
                         fill=4,
                         comment='Name: ' + name)

    # Print size
    #print(m.GetNumAtoms(), m.GetNumHeavyAtoms())
Exemple #5
0
def plot_irc(axes, i, d, f):
    #print(f)
    Eact, xyz, spc, Rc = pyg.read_irc(d + f)
    Eact = hdt.hatokcal * Eact

    xyz = xyz[1:]
    Eact = Eact[1:]
    Rc = Rc[:-1]

    #print(Rc[:,1])
    #print(Eact-Eact.min() - Rc[:,1]-Rc[:,1].min())
    s_idx = f.split('IRC')[1].split('.')[0]
    hdt.writexyzfile(c + f.split('.')[0] + '.xyz', xyz, spc)
    #print(f.split('IRC')[1].split('.')[0],Rc.shape)
    if Rc.size > 10:
        #------------ CV NETWORKS 1 -----------
        energies1 = []
        N = 0
        for comp in nc1:
            comp.setConformers(confs=xyz, types=list(spc))
            energies1.append(hdt.hatokcal * comp.energy())
            N = N + 1

        energies2 = []
        N = 0
        for comp in nc2:
            comp.setConformers(confs=xyz, types=list(spc))
            energies2.append(hdt.hatokcal * comp.energy())
            N = N + 1

        modl_std1 = np.std(energies1, axis=0)[::-1]
        energies1 = np.mean(np.vstack(energies1), axis=0)

        modl_std2 = np.std(energies2, axis=0)[::-1]
        energies2 = np.mean(np.vstack(energies2), axis=0)

        rmse1 = hdt.calculaterootmeansqrerror(energies1, Eact)
        rmse2 = hdt.calculaterootmeansqrerror(energies2, Eact)

        dba = Eact.max() - Eact[0]
        db1 = energies1.max() - energies1[0]
        db2 = energies2.max() - energies2[0]

        rpa = Eact[0] - Eact[-1]
        rp1 = energies1[0] - energies1[-1]
        rp2 = energies2[0] - energies2[-1]

        bar1.append(abs(db1 - dba))
        bar2.append(abs(db2 - dba))

        rmp1.append(abs(rpa - rp1))
        rmp2.append(abs(rpa - rp2))

        Ec1.append(energies1)
        Ec2.append(energies2)
        Ea.append(Eact)

        print(i, ')', f, ':', len(spc), ':', rmse1, rmse2, 'R/P1: ',
              abs(rpa - rp1), 'R/P2: ', abs(rpa - rp2), 'Barrier1:',
              abs(db1 - dba), 'Barrier2:', abs(db2 - dba))

        Rce = hdt.hatokcal * (Rc[:, 0] - Rc[:, 0][0])
        Rce1 = energies2[::-1] - energies2[::-1][0]

        axes.set_xlim([Rc.min(), Rc.max()])
        axes.set_ylim([Rce.min() - 1.0, Rce1.max() + 20.0])

        axes.plot(Rc[:, 1],
                  hdt.hatokcal * (Rc[:, 0] - Rc[:, 0][0]),
                  color='Black',
                  label='DFT')

        axes.errorbar(Rc[:, 1],
                      energies2[::-1] - energies2[::-1][0],
                      yerr=modl_std2,
                      fmt='--',
                      color='red',
                      label="ANI-1: " + "{:.1f}".format(bar2[-1]),
                      linewidth=2)
        axes.errorbar(Rc[:, 1],
                      energies1[::-1] - energies1[::-1][0],
                      yerr=modl_std1,
                      fmt='--',
                      color='blue',
                      label="[" + str(i) + "]: " + "{:.1f}".format(bar1[-1]),
                      linewidth=2)
        #axes.set_xlabel("Reaction Coordinate $\AA$")
        #axes.set_ylabel(r"$\Delta E$ $ (kcal \times mol^{-1})$")
        #axes.plot(Rc[:, 1], energies2[::-1]-energies2[::-1][0],'--',color='red',label="["+str(i)+"]: "+"{:.1f}".format(bar2[-1]),linewidth=3)
        #axes.plot(Rc[:, 1], energies1[::-1]-energies1[::-1][0],'--',color='green',label="["+str(i)+"]: "+"{:.1f}".format(bar1[-1]),linewidth=3)

        axes.legend(loc="upper left", fontsize=10)
        axes.set_title(str(f),
                       color='black',
                       fontdict={'weight': 'bold'},
                       x=0.8,
                       y=0.85)
    El = np.vstack(El)
    #dEm = np.sum(El, axis=0) / 3.0

    bad_idx = []
    god_idx = []
    for  i in range(Ea.shape[0]):
        test = np.array([True if j > 1.0 else False for j in El[:,i]])
        if np.all(test):
            bad_idx.append(i)
        else:
            god_idx.append(i)

    Nd = Nd + len(god_idx)
    Nb = Nb + len(bad_idx)

    if len(god_idx) != 0:
        # print(gn)
        dpack.store_data(data['parent'] + "/" + data['name'], coordinates=X[god_idx], energies=Ea[god_idx], species=S)

    if len(bad_idx) > 0:
        print(data['parent'],data['name'],'- Bad:',len(bad_idx),'of',len(god_idx))
        for i in bad_idx:
            print('(', i, ')', ':' ,El[:,i])

        hdn.writexyzfile(store_xyz+data['parent'] + '_' + data['name']+'.xyz',X[bad_idx],S)

dpack.cleanup()
print('Bad:',Nb,'of',Nd+Nb)

        EANI = np.asarray(EANI).flatten()

        # Max
        if abs(E.max()) > maxi:
            maxi = abs(E.max())

        # Min
        if abs(E.min()) < mini:
            mini = abs(E.min())

        #
        df_E.append(E)
        df_Ea.append(EANI)
        #print (df_E)

hdn.writexyzfile("coordlist.xyz", np.concatenate(coord_list), spec_list[0])

import matplotlib.pyplot as plt

df_E = hdn.hatokcal * np.array(df_E).flatten()
df_Ea = hdn.hatokcal * np.array(df_Ea).flatten()

rmse = hdn.calculaterootmeansqrerror(df_E, df_Ea)

x = list(range(0, df_E.shape[0]))

plt.scatter(df_D, df_E, label='DFT')
plt.scatter(df_D,
            df_Ea,
            color='red',
            label='ANI RMSE: ' + "{:.2f}".format(rmse) + 'kcal/mol',
Exemple #8
0
        sid = len(ds[id])
        of.write(f + ' ' + convert_eformula(S) + ' ' + str(sid) + ' ' +
                 str(X.shape[0]) + '\n')
        ds[id].append((X, S))
    else:
        of.write(f + ' ' + convert_eformula(S) + ' ' + str(0) + ' ' +
                 str(X.shape[0]) + '\n')
        ds.update({id: [(X, S)]})
of.close()
#print(i,len(ds))

Nt = 0
for i in ds.keys():
    X = []
    S = []
    for j in ds[i]:
        X.append(j[0])
        S.append(j[1])

    X = np.vstack(X)
    S = list(S[0])
    N = X.shape[0]

    Nt += N

    print(type(S), S)
    fn = prefix + '_' + convert_eformula(S) + '-' + str(N).zfill(5) + '.xyz'
    print('Writing: ', fn)
    hdt.writexyzfile(ndir + fn, X, S)
print('Total data:', Nt)
Exemple #9
0
    def __fragmentbox__(self, file, sig_cut):
        self.X = self.mol.get_positions()

        self.frag_list = []

        self.Nd = 0
        self.Nt = 0

        for i in range(len(self.Na)):
            si = self.ctd[i][2]
            di = self.ctd[i][1]
            Nai = self.Na[i]
            Xci = np.sum(self.X[si:si + Nai, :], axis=0) / Nai
            Xi = self.X[si:si + Nai, :]

            if np.all(Xci > di) and np.all(Xci < self.L - di):

                for j in range(i + 1, len(self.Na)):
                    sj = self.ctd[j][2]
                    dj = self.ctd[j][1]
                    Naj = self.Na[j]
                    Xcj = np.sum(self.X[sj:sj + Naj, :], axis=0) / Naj
                    Xj = self.X[sj:sj + Naj, :]

                    if np.all(Xcj > dj) and np.all(Xcj < self.L - dj):
                        dc = np.linalg.norm(Xci - Xcj)
                        if dc < di + dj + 6.0:
                            min = 10.0
                            for ii in range(Nai):
                                Xiii = Xi[ii]
                                for jj in range(Naj):
                                    Xjjj = Xj[jj]
                                    v = np.linalg.norm(Xiii - Xjjj)
                                    if v < min:
                                        min = v

                            if min < 4.2 and min > 1.1:
                                Xf = np.vstack([Xi, Xj])
                                Sf = self.S[si:si + Nai]
                                Sf.extend(self.S[sj:sj + Naj])

                                Xcf = np.sum(Xf, axis=0) / (Nai + Naj)
                                Xf = Xf - Xcf

                                E = np.empty(5, dtype=np.float64)
                                for id, nc in enumerate(self.aens.ncl):
                                    nc.setMolecule(coords=np.array(
                                        Xf, dtype=np.float32),
                                                   types=Sf)
                                    E[id] = nc.energy()[0]

                                sig = np.std(
                                    hdn.hatokcal * E) / np.sqrt(Nai + Naj)

                                self.Nt += 1
                                if sig > sig_cut:
                                    self.Nd += 1
                                    hdn.writexyzfile(
                                        file + str(i).zfill(4) + '-' +
                                        str(j).zfill(4) + '.xyz',
                                        Xf.reshape(1, Xf.shape[0], 3), Sf)
                                    self.frag_list.append(
                                        dict({
                                            'coords': Xf,
                                            'spec': Sf
                                        }))
Exemple #10
0
mol.set_calculator(ANI(False))
mol.calc.setnc(nc)

# Optimize molecule
start_time = time.time()
dyn = LBFGS(mol)
dyn.run(fmax=C)
print('[ANI Total time:', time.time() - start_time, 'seconds]')

print(hdt.evtokcal * mol.get_potential_energy())

# Save optimized mol
spc = mol.get_chemical_symbols()
pos = mol.get_positions(wrap=True).reshape(1, len(spc), 3)

hdt.writexyzfile(optfile, pos, spc)

exit(0)

# Open MD output
mdcrd = open(xyzfile, 'w')

# Open MD output
traj = open(trajfile, 'w')

# We want to run MD with constant energy using the Langevin algorithm
# with a time step of 0.5 fs, the temperature T and the friction
# coefficient to 0.02 atomic units.
dyn = Langevin(mol, 0.1 * units.fs, T * units.kB, 0.005)

# Run equilibration
Exemple #11
0
import hdnntools as hdt
import pyanitools as pyt
import os

file = '/home/jujuman/Research/DataReductionMethods/model6/model0.05me/ani_red_c06.h5'
sdir = '/home/jujuman/Research/GDB-11-AL-wB97x631gd/'

aload = pyt.anidataloader(file)

for data in aload:

    X = data['coordinates']
    S = data['species']
    P = data['path']

    parent = P.split('/')[1]
    index  = P.split('/')[2].split('mol')[1].zfill(7)

    path = sdir+parent
    if not os.path.exists(path):
        os.mkdir(path)

    print(path + '/' + parent + '-' + index + '.xyz','DATA:',X.shape[0])
    hdt.writexyzfile(path+'/'+parent+'-'+index+'.xyz',X,S)
Exemple #12
0
    xyz = data["coordinates"]
    nmc = data["nmdisplacements"]
    frc = data["forceconstant"]

    nms = nmt.nmsgenerator(xyz, nmc, frc, spc, T, minfc=1.0E-2)

    conformers = []
    for i in range(Ngen):
        conformers.append(nms.get_random_structure())
    conformers = np.stack(conformers)

    nc.setConformers(confs=conformers, types=list(spc))
    Ecmp = nc.energy()  # this generates AEVs

    aevs = np.empty([Ngen, len(rcatoms) * aevsize])
    for m in range(Ngen):
        for j, a in enumerate(rcatoms):
            aevs[m, j * aevsize:(j + 1) * aevsize] = nc.atomicenvironments(
                a, m).copy()

    dm = scispc.distance.pdist(aevs, 'sqeuclidean')
    picker = rdSimDivPickers.MaxMinPicker()
    seed_list = [i for i in range(Ngen)]
    np.random.shuffle(seed_list)
    print('seed:', seed_list)
    ids = list(picker.Pick(dm, Ngen, Nkep, firstPicks=list(seed_list[0:5])))
    ids.sort()
    print(f, len(ids), conformers.shape, dm.shape, ":", ids)

    hdn.writexyzfile(cdir + f.split('.')[0] + '.xyz', conformers[ids], spc)
of.close()
Exemple #13
0
    psi.append(i.split('_')[2].split('.')[0])

    data = hdt.readxyz2(dir + i)
    print(data[0])
    print(data[1])

    xyz.append(data[0])

    nc.setMolecule(coords=data[0][0], types=list(data[1]))

    Eact.append(nc.energy()[0])

xyz = np.vstack(xyz)
#print(xyz)
hdt.writexyzfile(
    '/home/jujuman/Dropbox/ChemSciencePaper.AER/JustinsDocuments/ACS_april_2017/DipeptidePhiPsi/data.xyz',
    xyz, list(data[1]))

data2 = hdt.readncdat(
    '/home/jujuman/Dropbox/ChemSciencePaper.AER/JustinsDocuments/ACS_april_2017/DipeptidePhiPsi/data.dat',
    type=np.float32)

Eact = np.array(Eact)
Edft = data2[2]

z = np.array(hdt.hatokcal * (Eact - Eact.min()),
             dtype=np.float32).reshape(x.shape[0], x.shape[0])
z2 = np.array(hdt.hatokcal * (Edft - Edft.min()),
              dtype=np.float32).reshape(x.shape[0], x.shape[0])

rmse = hdt.calculaterootmeansqrerror(z, z2)
Exemple #14
0
    def pDyn_QMsampling(self, pdynparams, gpuid):
        #Call subproc_pDyn class in pyaniasetools as activ
        activ = aat.subproc_pDyn(self.netdict['cnstfile'],
                                 self.netdict['saefile'],
                                 self.netdict['nnfprefix'],
                                 self.netdict['num_nets'], gpuid)
        pDyn_dir = pdynparams['pDyn_dir']  #Folder to write pDynamo input file
        num_rxn = pdynparams['num_rxn']  #Number of input rxn
        logfile_OPT = pdynparams['logfile_OPT']  #logfile for FIRE OPT output
        logfile_TS = pdynparams['logfile_TS']  #logfile for ANI TS output
        logfile_IRC = pdynparams['logfile_IRC']  #logfile for ANI IRC output
        sbproc_cmdOPT = pdynparams[
            'sbproc_cmdOPT']  #Subprocess commands to run pDyanmo
        sbproc_cmdTS = pdynparams['sbproc_cmdTS']
        sbproc_cmdIRC = pdynparams['sbproc_cmdIRC']
        IRCdir = pdynparams['IRCdir']  #path to get pDynamo saved IRC points
        indir = pdynparams[
            'indir']  #path to save XYZ files of IRC points to check stddev
        XYZfile = pdynparams[
            'XYZfile']  #XYZ file with high standard deviations structures
        l_val = pdynparams[
            'l_val']  #Ri --> randomly perturb in the interval [+x,-x]
        h_val = pdynparams['h_val']
        n_points = pdynparams[
            'n_points']  #Number of points along IRC (forward+backward+1 for TS)
        sig = pdynparams['sig']
        N = pdynparams['N']
        wkdir = pdynparams['wkdir']
        cnstfilecv = pdynparams['cnstfilecv']
        saefilecv = pdynparams['saefilecv']
        Nnt = pdynparams['Nnt']

        # --------------------------------- Run pDynamo ---------------------------
        # auto-TS ---> FIRE constraint OPT of core C atoms ---> ANI TS ---> ANI IRC

        activ.write_pDynOPT(num_rxn, pDyn_dir, wkdir, cnstfilecv, saefilecv,
                            Nnt)  #Write pDynamo input file in pDyndir
        activ.write_pDynTS(num_rxn, pDyn_dir, wkdir, cnstfilecv, saefilecv,
                           Nnt)
        activ.write_pDynIRC(num_rxn, pDyn_dir, wkdir, cnstfilecv, saefilecv,
                            Nnt)

        chk_OPT = activ.subprocess_cmd(sbproc_cmdOPT, False, logfile_OPT)
        if chk_OPT == 0:  #Wait until previous subproc is done!!
            chk_TS = activ.subprocess_cmd(sbproc_cmdTS, False, logfile_TS)
            if chk_TS == 0:
                chk_IRC = activ.subprocess_cmd(sbproc_cmdIRC, False,
                                               logfile_IRC)

        # ----------------------- Save points along ANI IRC ------------------------
        IRCfils = os.listdir(IRCdir)
        IRCfils.sort()

        for f in IRCfils:
            activ.getIRCpoints_toXYZ(n_points, IRCdir + f, f, indir)
        infils = os.listdir(indir)
        infils.sort()

        # ------ Check for high standard deviation structures and get vib modes -----
        for f in infils:
            stdev = activ.check_stddev(indir + f, sig)
            if stdev > sig:  #if stddev is high then get modes for that point
                nmc = activ.get_nm(indir +
                                   f)  #save modes in memory for later use

            activ.write_nm_xyz(
                XYZfile
            )  #writes all the structures with high standard deviations to xyz file

        # ----------------------------- Read XYZ for NM -----------------------------
        X, spc, Na, C = hdt.readxyz3(XYZfile)

        # --------- NMS for XYZs with high stddev --------
        for i in range(len(X)):
            for j in range(len(nmc)):
                gen = nmt.nmsgenerator_RXN(
                    X[i], nmc[j], spc[i], l_val,
                    h_val)  # xyz,nmo,fcc,spc,T,Ri_-x,Ri_+x,minfc = 1.0E-3

                N = 500
                gen_crd = np.zeros((N, len(spc[i]), 3), dtype=np.float32)
                for k in range(N):
                    gen_crd[k] = gen.get_random_structure()

                hdt.writexyzfile(self.cdir + 'nms_TS%i.xyz' % N, gen_crd,
                                 spc[i])

        del activ
Exemple #15
0
                           dtype=np.float32).reshape(xyz.shape[0], 3)

            #if conv:
            #    print('Failed to converge!!!')
            energies = np.zeros((Nnc), dtype=np.float64)

            #------------ CV NETWORKS 1 -----------
            N = 0
            for comp in nc1:
                comp.setMolecule(coords=xyz, types=list(spc))
                energies[N] = comp.energy()[0]
                N = N + 1

            if np.std(hdt.hatokcal * energies) > 5.0:
                hdt.writexyzfile(
                    '/home/jujuman/Research/CrossValidation/GDB-06-High-sdev/CV1bmol-'
                    + str(total_mol) + '.xyz',
                    xyz.reshape(1, xyz.shape[0], xyz.shape[1]), spc)
                total_bad = total_bad + 1

            perc = int(100.0 * total_bad / float(total_mol))
            output = '  ' + str(k) + ' ' + str(total_bad) + '/' + str(
                total_mol) + ' ' + str(perc) + '% (' + str(
                    Na) + ') : stps=' + str(stps) + ' : ' + str(
                        energies) + ' : std(kcal/mol)=' + str(
                            np.std(hdt.hatokcal *
                                   energies)) + ' : ' + Chem.MolToSmiles(m)

            if np.std(hdt.hatokcal * energies) > 5.0:
                print("CV1:", output)

            f1.write(output + '\n')
Exemple #16
0
    if not ('F' in Chem.MolToSmiles(m)):
        print(n,') Working on',l,Chem.MolToSmiles(m),'...')

        # Add hydrogens
        m = Chem.AddHs(m)

        # generate Nc conformers
        cids = AllChem.EmbedMultipleConfs(m, N, useRandomCoords=True)

        # Classical Optimization
        for cid in cids:
            _ = AllChem.MMFFOptimizeMolecule(m, confId=cid, maxIters=250)

        # Set mols
        activ.setrdkitmol(m,cids)

        # Generate conformations
        X = activ.generate_conformations(N, T, dt, 2000, 10, dS = 0.34)

        nfo = activ._infostr_
        difo.write('  -'+l+': '+nfo+'\n')
        print(nfo)
        difo.flush()

        # Set chemical symbols
        Xi, S = pya.__convert_rdkitmol_to_nparr__(m)

        # Store structures
        hdt.writexyzfile(sdir+l+'-'+str(n).zfill(2)+'.xyz', X, S)
difo.close()
Exemple #17
0
Nnc = 2

files = os.listdir(d)
files.sort()
#print(files)
# Construct pyNeuroChem classes
nc1 =  [pync.conformers(wkdir1 + cnstfile, wkdir1 + saefile, wkdir1 + 'train' + str(l) + '/networks/', 0, False) for l in range(Nnc)]

comp_xyz = []

for f in files:
    #print(f)
    Eact, xyz, spc, Rc = pyg.read_irc(d+f)
    s_idx = f.split('IRC')[1].split('.')[0]
    hdt.writexyzfile(c+f.split('.')[0]+'.xyz',xyz,spc)
    #print(f.split('IRC')[1].split('.')[0],Rc.shape)
    if Rc.size > 10:
        #------------ CV NETWORKS 1 -----------
        energies = []
        N = 0
        for comp in nc1:
            comp.setConformers(confs=xyz, types=list(spc))
            energies.append(hdt.hatokcal*comp.energy())
            N = N + 1

        energies = np.vstack(energies)
        modl_std = np.std(energies[::-1],axis=0) / float(len(spc))

        bad_cnt = 0
        bad_xyz = []
Exemple #18
0
        nms = nmt.nmsgenerator(xyz,nmc,frc,spc,T,minfc=5.0E-2)
        conformers = nms.get_Nrandom_structures(Ngen)

        ids = dc.get_divconfs_ids(conformers, spc, Ngen, Nkep, atmlist)
        conformers = conformers[ids]
        print('    -',f,len(ids),conformers.shape)

        sigma = anicv.compute_stddev_conformations(conformers,spc)
        #print(sigma)
        sid = np.where( sigma >  0.0 )[0]
        print('  -', fi, 'of', len(files), ') File:', f, 'keep:', sid.size,'percent:',"{:.2f}".format(100.0*sid.size/Ngen))


        Nt += sigma.size
        Nk += sid.size
        if 100.0*sid.size/float(Ngen) > 0:
            Nkp += sid.size
            cfn = f.split('.')[0].split('-')[0]+'_'+str(idx).zfill(5)+'-'+f.split('.')[0].split('-')[1]+'.xyz'
            hdn.writexyzfile(cdir+cfn,conformers[sid],spc)
        idx += 1

    Nkt += Nk
    Ntt += Nt
    of.write('    -Total: '+str(Nk)+' of '+str(Nt)+' percent: '+"{:.2f}".format(100.0*Nk/Nt)+'\n')
    of.flush()
    print('    -Total:',Nk,'of',Nt,'percent:',"{:.2f}".format(100.0*Nk/Nt))

of.write('\nGrand Total: '+ str(Nkt)+ ' of '+ str(Ntt)+' percent: '+"{:.2f}".format(100.0*Nkt/Ntt)+ ' Kept '+str(Nkp)+'\n')
print('\nGrand Total:', Nkt, 'of', Ntt,'percent:',"{:.2f}".format(100.0*Nkt/Ntt), 'Kept',Nkp)
of.close()
Exemple #19
0
dtdir = '/home/jujuman/Dropbox/IRC_DBondMig/Benzene_rxn2/rxn_ben'+str(n)+'/IRC.log'

#en, cd, ty  = pyg.get_irc_data(dtdir+'IRC_fwd.log',dtdir+'IRC_bck.log',dtdir+'saddle_ts.log')
en, cd, ty, Rc = pyg.read_irc(dtdir)

np.savez(wkdir+'reaction_coordinate.npz',x=Rc)

Na = len(ty)
Nc = en.shape[0]

print (cd.shape,' ',en.shape)

#for i,x in enumerate(cd):
    #hdt.write_rcdb_input(x,ty,i,wkdir,fpf,5,'wb97x/6-31g*','600.0',opt='0')

hdt.writexyzfile(wkdir+'irc.xyz',cd,ty)

f = open(wkdir+'irc.dat','w')
f.write("comment\n")
f.write(str(Nc)+'\n')
f.write(str(Na)+',')
for j in ty: f.write(j+',')
f.write('\n')
mol = 0
for l,i in enumerate(cd):

    for j in i:
        for k in j:
            f.write(str(k)+',')
        f.write('')
    f.write(str(en[l]) + ',' + '\n')
import hdnntools as hdn
import pyNeuroChem as pync
import matplotlib.pyplot as plt

file = "/home/jujuman/Research/ANI-DATASET/rxn_db_mig.h5"

al = pyt.anidataloader(file)

al.totalload()
data = al.get_all_data()
al.cleanup()

df_E = hdn.hatokcal * data[1].flatten()
xyz = data[0].reshape(df_E.shape[0], len(data[2][0]), 3)

hdn.writexyzfile('/home/jujuman/crds.xyz', xyz, data[2][0])

# Set required files for pyNeuroChem
#wkdir    = '/home/jujuman/Dropbox/ChemSciencePaper.AER/ANI-c08e-ntwk/'
wkdir = '/home/jujuman/Research/GDB-11-wB97X-6-31gd/ANI-c08e-ntwk_newtrain/'
cnstfile = wkdir + 'rHCNO-4.6A_16-3.1A_a4-8.params'
saefile = wkdir + 'sae_6-31gd.dat'
nnfdir = wkdir + 'networks/'

# Construct pyNeuroChem class
mol = pync.conformers(cnstfile, saefile, nnfdir, 0)

mol.setConformers(confs=xyz, types=list(data[2][0]))

E = hdn.hatokcal * mol.energy()
        #Nd += len(X)
        #Nt += Nu
        Nt += 1
        if len(X) > 0:
            Nd += 1
            X = np.stack(X)
            x = X[0].reshape(1, X.shape[1], 3)  # keep only the first guy

        print('    -kept', len(x), 'of', Nu)

        if len(X) > 0:
            S = gdb.get_symbols_rdkitmol(m)

        hdn.write_rcdb_input(x[0],
                             S,
                             int(id),
                             wdir,
                             fpf,
                             100,
                             LOT,
                             '500.0',
                             fill=4,
                             comment='smiles: ' + Chem.MolToSmiles(m) +
                             ' GDB-ID: ' + str(ridx))
        hdn.writexyzfile(wdir + fpf + '-' + str(id).zfill(4) + '.xyz',
                         x.reshape(1, x.shape[1], x.shape[2]), S)
        #print(str(id).zfill(8))

molnfo.close()
print('Total mols:', Nd, 'of', Nt, 'percent:',
      "{:.2f}".format(100.0 * Nd / float(Nt)))
mol = Atoms(spc, xyz, calculator=ANI(False))
mol.calc.setnc(nc)
LBFGS(mol).run(fmax=0.00001)
vib = Vibrations(mol)
vib.run()
vib.summary()

print(xyz)
xyz = mol.get_positions().copy()
print(xyz)

nm_cr = vib.modes[6:]

Nf = 3 * len(spc) - 6
nmo = nm_cr.reshape(Nf, len(spc), 3)

fcc = np.array([
    1.314580, 1.3147106, 1.3149728, 1.5161799, 1.5164505, 5.6583018, 6.7181139,
    6.7187967, 6.7193842
])

gen = nm.nmsgenerator(xyz, nmo, fcc, spc, 2000.0)

N = 2000
gen_crd = np.zeros((N, len(spc), 3), dtype=np.float32)
for i in range(N):
    gen_crd[i] = gen.get_random_structure()

hdt.writexyzfile('pynmstesting.xyz', gen_crd, spc)
Exemple #23
0
#dir = '/home/jujuman/Research/GDB-11-wB97X-6-31gd/dnnts_rxns/scans_double_bond_migration/'
#dir = '/home/jujuman/Dropbox/IRC_DBondMig/rxn1/'
dir = '/home/jujuman/Dropbox/IRC_DBondMig/rxn5/'

filef = dir + 'IRC_fwd.log'
fileb = dir + 'IRC_bck.log'

dataf = g09.read_irc(filef)
datab = g09.read_irc(fileb)

xyz = np.concatenate([np.flipud(datab[1]), dataf[1]])
eng = hdn.hatokcal * np.concatenate([np.flipud(datab[0]), dataf[0]])

print(xyz.shape)

hdn.writexyzfile(dir + 'scan.xyz', xyz, dataf[2])

# Set required files for pyNeuroChem

# Set required files for pyNeuroChem
anipath = '/home/jujuman/Dropbox/ChemSciencePaper.AER/ANI-c08e-ntwk'
cnstfile = anipath + '/rHCNO-4.6A_16-3.1A_a4-8.params'
saefile = anipath + '/sae_6-31gd.dat'
nnfdir = anipath + '/networks/'

# Construct pyNeuroChem class
nc = pync.conformers(cnstfile, saefile, nnfdir, 0)

# Set the conformers in NeuroChem
nc.setConformers(confs=xyz, types=dataf[2])
Exemple #24
0
    ftme_t = 0.0
    for n, m in enumerate(files[0:int(id[0] * len(files))]):
        data = hdn.read_rcdb_coordsandnm(id[1] + m)
        S = data["species"]
        print(n, ') Working on', m, '...')

        # Set mols
        activ.setmol(data["coordinates"], S)

        # Generate conformations
        X = activ.generate_conformations(N, T1, T2, dt, Nc, Ns, dS=0.25)

        ftme += activ.failtime
        ftme_t += activ.failtime

        nfo = activ._infostr_
        difo.write('  -' + m + ': ' + nfo + '\n')
        difo.flush()
        print(nfo)

        if X.size > 0:
            hdn.writexyzfile(
                dstore + 'mds_' + m.split('.')[0] + '_' + str(di).zfill(4) +
                '.xyz', X, S)
    difo.write('Class mean fail time: ' +
               "{:.2f}".format(ftme_t / float(len(files))) + '\n')
difo.write('Complete mean fail time: ' + "{:.2f}".format(ftme / float(Nmol)) +
           '\n')
print(Nmol)
difo.close()
Exemple #25
0
    def normal_mode_sampling(self, T, Ngen, Nkep, sig, gpuid):
        of = open(self.ldtdir + self.datdir + '/info_data_nms.nfo', 'w')

        aevsize = self.netdict['aevsize']

        anicv = aat.anicrossvalidationconformer(self.netdict['cnstfile'],
                                                self.netdict['saefile'],
                                                self.netdict['nnfprefix'],
                                                self.netdict['num_nets'],
                                                gpuid, False)

        dc = aat.diverseconformers(self.netdict['cnstfile'],
                                   self.netdict['saefile'],
                                   self.netdict['nnfprefix'] + '0/networks/',
                                   aevsize, gpuid, False)

        Nkp = 0
        Nkt = 0
        Ntt = 0
        idx = 0
        for di, id in enumerate(self.idir):
            of.write(
                str(di) + ' of ' + str(len(self.idir)) + ') dir: ' + str(id) +
                '\n')
            #print(di,'of',len(self.idir),') dir:', id)
            files = os.listdir(id)
            files.sort()

            Nk = 0
            Nt = 0
            for fi, f in enumerate(files):
                data = hdt.read_rcdb_coordsandnm(id + f)

                #print(id+f)
                spc = data["species"]
                xyz = data["coordinates"]
                nmc = data["nmdisplacements"]
                frc = data["forceconstant"]

                nms = nmt.nmsgenerator(xyz, nmc, frc, spc, T, minfc=5.0E-2)
                conformers = nms.get_Nrandom_structures(Ngen)

                ids = dc.get_divconfs_ids(conformers, spc, Ngen, Nkep, [])
                conformers = conformers[ids]
                #print('    -',f,len(ids),conformers.shape)

                sigma = anicv.compute_stddev_conformations(conformers, spc)
                #print(sigma)
                sid = np.where(sigma > sig)[0]
                #print(sid)
                #print('  -', fi, 'of', len(files), ') File:', f, 'keep:', sid.size,'percent:',"{:.2f}".format(100.0*sid.size/Ngen))

                Nt += sigma.size
                Nk += sid.size
                if 100.0 * sid.size / float(Ngen) > 0:
                    Nkp += sid.size
                    cfn = f.split('.')[0].split('-')[0] + '_' + str(idx).zfill(
                        5) + '-' + f.split('.')[0].split('-')[1] + '_2.xyz'
                    hdt.writexyzfile(self.cdir + cfn, conformers[sid], spc)
                idx += 1

            Nkt += Nk
            Ntt += Nt
            of.write('    -Total: ' + str(Nk) + ' of ' + str(Nt) +
                     ' percent: ' + "{:.2f}".format(100.0 * Nk / Nt) + '\n')
            of.flush()
            #print('    -Total:',Nk,'of',Nt,'percent:',"{:.2f}".format(100.0*Nk/Nt))

        del anicv
        del dc

        of.write('\nGrand Total: ' + str(Nkt) + ' of ' + str(Ntt) +
                 ' percent: ' + "{:.2f}".format(100.0 * Nkt / Ntt) + ' Kept ' +
                 str(Nkp) + '\n')
        #print('\nGrand Total:', Nkt, 'of', Ntt,'percent:',"{:.2f}".format(100.0*Nkt/Ntt), 'Kept',Nkp)
        of.close()
Exemple #26
0
    def __fragmentbox__(self, file, sighat):
        self.X = self.mol.get_positions()

        self.frag_list = []

        self.Nd = 0
        self.Nt = 0

        self.maxsig = 0

        for i in range(len(self.Na)):
            si = self.ctd[i][2]
            di = self.ctd[i][1]
            Nai = self.Na[i]
            Xci = np.sum(self.X[si:si + Nai, :], axis=0) / Nai
            Xi = self.X[si:si + Nai, :]

            if np.all(Xci > 4.5) and np.all(Xci <= self.L - 4.5):

                if np.all(Xci > di) and np.all(Xci < self.L - di):
                    Xf = Xi
                    Sf = self.S[si:si + Nai]

                    Nmax = random.randint(2, 14)
                    Nmol = 0
                    for j in range(len(self.Na)):
                        if i != j:
                            sj = self.ctd[j][2]
                            dj = self.ctd[j][1]
                            Naj = self.Na[j]
                            Xcj = np.sum(self.X[sj:sj + Naj, :], axis=0) / Naj
                            Xj = self.X[sj:sj + Naj, :]

                            if np.all(Xcj > dj) and np.all(Xcj < self.L - dj):
                                dc = np.linalg.norm(Xci - Xcj)
                                if dc < di + dj + 5.0:
                                    min = 10.0
                                    for ii in range(Nai):
                                        Xiii = Xi[ii]
                                        for jj in range(Naj):
                                            Xjjj = Xj[jj]
                                            v = np.linalg.norm(Xiii - Xjjj)
                                            if v < min:
                                                min = v

                                    if min < 4.5 and min > 0.70:
                                        Xf = np.vstack([Xf, Xj])
                                        Sf.extend(self.S[sj:sj + Naj])
                                        Nmol += 1
                            if Nmol >= Nmax:
                                break

                    Xcf = np.sum(Xf, axis=0) / float(len(Sf))
                    Xf = Xf - Xcf

                    E = np.empty(5, dtype=np.float64)
                    for id, nc in enumerate(self.aens.ncl):
                        nc.setMolecule(coords=np.array(Xf, dtype=np.float32),
                                       types=Sf)
                        E[id] = nc.energy()[0]

                    sig = np.std(hdn.hatokcal * E) / np.sqrt(Nai + Naj)
                    #print('Mol(',i,'): sig=',sig)

                    self.Nt += 1
                    if sig > sighat:
                        if sig > self.maxsig:
                            self.maxsig = sig
                        self.Nd += 1
                        hdn.writexyzfile(file + str(i).zfill(4) + '.xyz',
                                         Xf.reshape(1, Xf.shape[0], 3), Sf)
                        self.frag_list.append(dict({'coords': Xf, 'spec': Sf}))
Exemple #27
0
            if sigma > 0.1:
                Nb = Nb + 1
                if key not in data.keys():
                    data[key] = (spc_l,[xyz_l])
                else:
                    #print(type())
                    p_xyz = data[key][1]
                    p_xyz.append(xyz_l)
                    data[key] = (spc_l, p_xyz)

                '''
                print('H2O_frag' + str(i).zfill(3) + '.xyz : O=' + str(No).zfill(3) + ' : H=' + str(Nh).zfill(3) + ' : '
                            + "{:13.3f}".format(energies[0]) +
                        ' ' + "{:13.3f}".format(energies[1]) +
                        ' ' + "{:13.3f}".format(energies[2]) +
                        ' ' + "{:13.3f}".format(energies[3]) +
                        ' ' + "{:13.3f}".format(energies[4]) +
                        ' ' + "{:.3f}".format(sigma))
                '''
        else:
            Ne = Ne + 1
            print('Warning: fragmentation failed! No:',No,'Nh:',Nh)
    print('Bad Frags:', Nb, 'of',Nf,'Error:',Ne,'Estd:',Etot/float(Nf), ' Avg. Frc. Std.: ', Ftot/float(Nf))
    dyn.run(40)  # Do 100 steps of MD

for key in data:
    xyz = np.array(data[key][1])
    spc = data[key][0]
    print(key,xyz.shape[0])
    hdt.writexyzfile(stdir + 'frag' + key + '.xyz', xyz, spc)