Example #1
0
def pyNCcomputeTestSet(cnstfile1,
                       saefile1,
                       nnfdir1,
                       dtdir,
                       dtdftpref,
                       dtpm6dir,
                       dtpm6pref,
                       N,
                       P=1.0):
    # Construct pyNeuroChem classes
    nc = pync.pyNeuroChem(cnstfile1, saefile1, nnfdir1, 0)

    Eact = []
    Ecmp = []
    Eotr = []
    Ndat = 0
    Nmol = 0
    t = 0.0
    for i in range(0, N):
        print('|----- File ' + str(i) + ' -----|')
        print('Name: ' + dtdir + dtdftpref + str(i) + '_test.dat')

        rv = bool(np.random.binomial(1, P))
        if (os.path.isfile(dtdir + dtdftpref + str(i) + '_test.dat') and rv):

            xyz, typ, Eact_t = gt.readncdat(dtdir + dtdftpref + str(i) +
                                            '_test.dat')
            xyz1, typ1, Eotr_t = gt.readncdat(dtpm6dir + dtpm6pref + str(i) +
                                              '_test.dat')

            if len(Eact_t) == len(Eotr_t):

                Eact += shiftlsttomin(Eact_t)
                Eotr += shiftlsttomin(Eotr_t)

                #Eact +=  Eact_t
                #Eotr +=  Eotr_t

                Nmol += 1
                Ndat += len(Eact_t)

                # Set the conformers in NeuroChem
                nc.setConformers(confs=xyz, types=typ)

                # Compute Forces of Conformations
                print('Computing energies 1...')
                _t1b = tm.time()
                Ecmp_t = nc.computeEnergies()
                _t2b = (tm.time() - _t1b) * 1000.0
                t += _t2b
                print('Computation complete 1. Time: ' +
                      "{:.4f}".format(_t2b) + 'ms')
                Ecmp += shiftlsttomin(Ecmp_t)
                #Ecmp +=  Ecmp_t

    Eact = np.array(Eact, dtype=float)
    Eotr = np.array(Eotr, dtype=float)
    Ecmp = np.array(Ecmp, dtype=float)

    return Eact, Ecmp, Eotr, Ndat, Nmol, t
def pyNCcomputeTestSet(cnstfile1,saefile1,nnfdir1,dtdir,dtdftpref,dtpm6dir,dtpm6pref,N,P=1.0):
    # Construct pyNeuroChem classes
    nc = pync.pyNeuroChem(cnstfile1,saefile1,nnfdir1,0)

    Eact = []
    Ecmp = []
    Eotr = []
    Ndat = 0
    Nmol = 0
    t = 0.0
    for i in range(0,N):

        rv = bool(np.random.binomial(1,P))
        if (os.path.isfile(dtdir + dtdftpref + str(i) + '_test.dat') and rv):

            xyz,typ,Eact_t,chk    = gt.readncdat(dtdir + dtdftpref + str(i) + '_test.dat')
            xyz1,typ1,Eotr_t,chk  = gt.readncdat(dtpm6dir + dtpm6pref + str(i) + '_test.dat')

            if len(Eact_t) == len(Eotr_t):

                #print ('|----- File ' + str(i) + ' -----|')
                #print ('Name: ' + dtdir + dtdftpref + str(i) + '_test.dat')

                Eact += shiftlsttomin( Eact_t )
                Eotr += shiftlsttomin( Eotr_t )

                #Eact +=  Eact_t
                #Eotr +=  Eotr_t

                Nmol += 1
                Ndat += len( Eact_t )

                # Set the conformers in NeuroChem
                nc.setConformers(confs=xyz,types=typ)

                # Compute Forces of Conformations
                print(' ' + str(Nmol) + ') Computing ' + str(len( Eact_t )) + ' energies...')
                _t1b = tm.time()
                Ecmp_t = nc.computeEnergies()
                _t2b = (tm.time() - _t1b) * 1000.0
                t += _t2b
                #print('Computation complete. Time: ' + "{:.4f}".format(_t2b)  + 'ms')
                Ecmp += shiftlsttomin(  Ecmp_t )
                #Ecmp +=  Ecmp_t99
            else:
                print (str(len(Eact_t)) + '!=' + str(len(Eotr_t)) + ' File: ' + dtdir + dtdftpref + str(i) + '_test.dat')
        else:
            print('File not found: ' + dtdir + dtdftpref + str(i) + '_test.dat')
    Eact = np.array(Eact,dtype=float)
    Eotr = np.array(Eotr,dtype=float)
    Ecmp = np.array(Ecmp,dtype=float)

    return Eact,Ecmp,Eotr,Ndat,Nmol,t
def read_data_files_convert_cm(file, N):

    # Get training molecules
    xyz_tr, typ_tr, Eact_tr, readf = gt.readncdat(file, np.float32)

    # Compute energy of atoms at infinite separation
    ise = cm.computerISE(typ_tr)

    Eact_tr = (Eact_tr - ise)

    Nm = Eact_tr.shape[0]
    cmat = cm.GenCMatData2(xyz_tr, typ_tr, N)

    return cmat, Eact_tr, ise
plt.rc('font', **font)

# Set required files for pyNeuroChem
wkdir = '/home/jujuman/Research/GDB-11-wB97X-6-31gd/train_08_5/'

# Network  Files
cnstfile = wkdir + 'rHCNO-4.7A_32-3.2A_a8-8.params'
saefile = wkdir + 'sae_6-31gd.dat'
nnfdir = wkdir + 'networks/'

# Construct pyNeuroChem classes
nc = pync.pyNeuroChem(cnstfile, saefile, nnfdir, 0)

# Read nc DATA
xyz, typ, Eact = gt.readncdat(
    '/home/jujuman/Dropbox/ChemSciencePaper.AER/TestCases/Retinol/data/retinolconformer_DFT.dat'
)
xyz1, typ1, Eact1 = gt.readncdat(
    '/home/jujuman/Dropbox/ChemSciencePaper.AER/TestCases/Retinol/data/retinolconformer_DFTB.dat'
)

Eact = np.array(Eact)
Eact1 = np.array(Eact1)

# Set the conformers in NeuroChem
nc.setConformers(confs=xyz, types=typ)

# Print some data from the NeuroChem
print('Number of Atoms Loaded: ' + str(nc.getNumAtoms()))
print('Number of Confs Loaded: ' + str(nc.getNumConfs()))
def produce_scan(ax,title,xlabel,cnstfile,saefile,nnfdir,dtdir,dt1,dt2,dt3,smin,smax,iscale,ishift):
    xyz, typ, Eact, chk = gt.readncdat(dtdir + dt1,np.float32)
    xyz2, typ2, Eact2, chk = gt.readncdat(dtdir + dt2)
    xyz3, typ3, Eact3, chk = gt.readncdat(dtdir + dt3)

    #gt.writexyzfile("/home/jujuman/Dropbox/ChemSciencePaper.AER/TestCases/Dihedrals/4-Cyclohexyl-1-butanol/optimization/dihedral_"+dt1+".xyz",xyz,typ)

    #Eact = np.array(Eact)
    #Eact2 = np.array(Eact2)
    #Eact3 = np.array(Eact3)

    # Construct pyNeuroChem classes
    nc1 = pync.pyNeuroChem(cnstfile, saefile, nnfdir, 1)

    # Set the conformers in NeuroChem
    nc1.setConformers(confs=xyz, types=typ)

    # Print some data from the NeuroChem
    print('1) Number of Atoms Loaded: ' + str(nc1.getNumAtoms()))
    print('1) Number of Confs Loaded: ' + str(nc1.getNumConfs()))

    # Compute Forces of Conformations
    print('Computing energies 1...')
    _t1b = tm.time()
    Ecmp1 = nc1.energy()
    print('Computation complete 1. Time: ' + "{:.4f}".format((tm.time() - _t1b) * 1000.0) + 'ms')

    n = smin
    m = smax
    Ecmp1 = gt.hatokcal * Ecmp1
    Eact  = gt.hatokcal * Eact
    Eact2 = gt.hatokcal * Eact2
    Eact3 = gt.hatokcal * Eact3

    IDX = np.arange(0, Eact.shape[0], 1, dtype=float) * iscale + ishift

    IDX = IDX[n:m]
    Eact = Eact[n:m]
    Eact2 = Eact2[n:m]
    Eact3 = Eact3[n:m]
    Ecmp1 = Ecmp1[n:m]

    Ecmp1 = Ecmp1 - Ecmp1.min()
    Eact  = Eact  - Eact.min()
    Eact2 = Eact2 - Eact2.min()
    Eact3 = Eact3 - Eact3.min()

    rmse1 = gt.calculaterootmeansqrerror(Eact, Ecmp1)
    rmse3 = gt.calculaterootmeansqrerror(Eact, Eact2)
    rmse4 = gt.calculaterootmeansqrerror(Eact, Eact3)

    print("Spearman corr. 1: " + "{:.3f}".format(st.spearmanr(Ecmp1, Eact)[0]))
    print("Spearman corr. 2: " + "{:.3f}".format(st.spearmanr(Eact2, Eact)[0]))
    print("Spearman corr. 3: " + "{:.3f}".format(st.spearmanr(Eact3, Eact)[0]))

    ax.plot(IDX, Eact, '-', marker=r'o', color='black', label='DFT',
             linewidth=2, markersize=7)
    ax.plot(IDX, Ecmp1, ':', marker=r'D', color='red', label='ANI-1 RMSE: ' + '%s' % float('%.3g' % rmse1) + ' kcal/mol',
             linewidth=2, markersize=5)
    ax.plot(IDX, Eact2, ':', marker=r'v', color='blue', label='DFTB  RMSE: ' + '%s' % float('%.3g' % rmse3) + ' kcal/mol',
             linewidth=2, markersize=5)
    ax.plot(IDX, Eact3, ':', marker=r'*', color='orange', label='PM6   RMSE: ' + '%s' % float('%.3g' % rmse4) + ' kcal/mol',
             linewidth=2, markersize=7)

    #ax.plot(IDX, Eact, color='black', label='DFT', linewidth=3)
    #ax.scatter(IDX, Eact, marker='o', color='black', linewidth=4)

    th = ax.set_title(title,fontsize=16)
    th.set_position([0.5,1.005])

    # Set Limits
    ax.set_xlim([ IDX.min(),IDX.max()])
    ax.set_ylim([Eact.min()-1.0,Eact.max()+1.0])

    ax.set_ylabel('$\Delta$E calculated (kcal/mol)')
    ax.set_xlabel(xlabel)
    ax.legend(bbox_to_anchor=(0.2, 0.98), loc=2, borderaxespad=0., fontsize=14)
import numpy as np

from scipy import stats as st

# Set required files for pyNeuroChem
wkdir    = '/home/jujuman/Research/GDB-11-wB97X-6-31gd/train_07/'

#Network Parameter Files
cnstfile = wkdir + 'rHCNO-4.5A_32-3.1A_a8-8.params'
saefile  = wkdir + 'sae_6-31gd.dat'
nnfdir   = wkdir + 'networks/'

# Construct pyNeuroChem classes
nc = pync.pyNeuroChem(cnstfile,saefile,nnfdir,0)

xyz,typ,Eact = gt.readncdat('/home/jujuman/Dropbox/Research/ChemSciencePaper/TestCases/Atomoxetine/Atomoxetine_conformersC_test.dat')

Eact = np.array(Eact)

# Set the conformers in NeuroChem
nc.setConformers(confs=xyz,types=typ)

# Print some data from the NeuroChem
print( 'Number of Atoms Loaded: ' + str(nc.getNumAtoms()) )
print( 'Number of Confs Loaded: ' + str(nc.getNumConfs()) )

# Compute Forces of Conformations
print('Computing energies 1...')
Ecmp = np.array( nc.computeEnergies() )
print('Computation complete 1.')
from scipy import stats as st

# Set required files for pyNeuroChem
wkdir = '/home/jujuman/Research/GDB-11-wB97X-6-31gd/train_07/'

#Network Parameter Files
cnstfile = wkdir + 'rHCNO-4.5A_32-3.1A_a8-8.params'
saefile = wkdir + 'sae_6-31gd.dat'
nnfdir = wkdir + 'networks/'

# Construct pyNeuroChem classes
nc = pync.pyNeuroChem(cnstfile, saefile, nnfdir, 0)

xyz, typ, Eact = gt.readncdat(
    '/home/jujuman/Dropbox/Research/ChemSciencePaper/TestCases/Atomoxetine/Atomoxetine_conformersC_test.dat'
)

Eact = np.array(Eact)

# Set the conformers in NeuroChem
nc.setConformers(confs=xyz, types=typ)

# Print some data from the NeuroChem
print('Number of Atoms Loaded: ' + str(nc.getNumAtoms()))
print('Number of Confs Loaded: ' + str(nc.getNumConfs()))

# Compute Forces of Conformations
print('Computing energies 1...')
Ecmp = np.array(nc.computeEnergies())
print('Computation complete 1.')
Eerr = []
time = 0.0

ld = [0,0.0]
sd = [0,100000.0]

err = []
sze = []

N = 0

for i in files:
#for i in range(rng[0],rng[1]):
    #xyz,typ,Eact_t,readf    = gt.readncdat(dtdir + fpref + str(i) + fpost)
    xyz,typ,Eact_t,readf    = gt.readncdat(dtdir + i)

    if readf:
        # Set the conformers in NeuroChem
        nc.setConformers(confs=xyz,types=typ)

        #print('FILE: ' + dtdir + fpref + str(i) + fpost)
        print('FILE: ' + dtdir + i)

        # Print some data from the NeuroChem
        print( '1) Number of Atoms Loaded: ' + str(nc.getNumAtoms()) )
        print( '1) Number of Confs Loaded: ' + str(nc.getNumConfs()) )

        # Compute Energies of Conformations
        print('Computing energies...')
        _t1b = tm.time()
N = 30

print('Computing coulomb matrices...')
_t1b = tm.time()

cnt=0

for i in files:
    cnt += 1
    print('FILE: ' + str(cnt) + ' of ' + str(len(files)) + ' ' + i)

    # Set file
    file = dtdir + i

    # Get training molecules
    xyz_tr, typ_tr, Eact_tr, readf = gt.readncdat(file, np.float32)

    # Compute energy of atoms at infinite separation
    ise = cm.computerISE(typ_tr)

    Eact_tr = (Eact_tr - ise)

    Nm = Eact_tr.shape[0]
    cmat = cm.GenCMatData(xyz_tr,typ_tr,N)

    cmat[:, N * N ] = Eact_tr

    output_file = open(cmdfile + '_' + str(cnt), 'wb')
    cmat.tofile(output_file)
    output_file.close()
plt.rc('font', **font)

# Set required files for pyNeuroChem
wkdir = '/home/jujuman/Research/GDB-11-wB97X-6-31gd/train_08_5/'

# Network  Files
cnstfile = wkdir + 'rHCNO-4.7A_32-3.2A_a8-8.params'
saefile  = wkdir + 'sae_6-31gd.dat'
nnfdir   = wkdir + 'networks/'

# Construct pyNeuroChem classes
nc = pync.pyNeuroChem(cnstfile,saefile,nnfdir,0)

# Read nc DATA
xyz,typ,Eact = gt.readncdat('/home/jujuman/Dropbox/ChemSciencePaper.AER/TestCases/Retinol/data/retinolconformer_DFT.dat')
xyz1,typ1,Eact1 = gt.readncdat('/home/jujuman/Dropbox/ChemSciencePaper.AER/TestCases/Retinol/data/retinolconformer_DFTB.dat')

Eact = np.array(Eact)
Eact1 = np.array(Eact1)

# Set the conformers in NeuroChem
nc.setConformers(confs=xyz,types=typ)

# Print some data from the NeuroChem
print( 'Number of Atoms Loaded: ' + str(nc.getNumAtoms()) )
print( 'Number of Confs Loaded: ' + str(nc.getNumConfs()) )

# Compute Forces of Conformations
print('Computing energies...')
_t1b = tm.time()
sze = []

Herror = 0.0
Wfile = ''

Lerror = 100.0
Bfile = ''

Nf = len(files)
cnt = 0

for i in files:
    cnt += 1
#for i in range(rng[0],rng[1]):
    #xyz,typ,Eact_t,readf    = gt.readncdat(dtdir + fpref + str(i) + fpost)
    xyz,typ,Eact_W,readf    = gt.readncdat(dtdir + i)

    xyz = np.asarray(xyz,dtype=np.float32)
    xyz = xyz.reshape((xyz.shape[0],len(typ),3))

    if readf and xyz.shape[0] > 0:

        #print('FILE: ' + dtdir + fpref + str(i) + fpost)
        print('FILE: ' + str(cnt) + ' of ' + str(Nf) + ' ' + i)

        Nm = xyz.shape[0]
        Na = xyz.shape[1]

        Nat = Na * Nm

        Nit = int(np.ceil(Nat/65000.0))
Example #12
0
for d in dtdirs:

    tmp = listdir(d)
    files = list({("_".join(f.split("_")[:-1])) for f in tmp})

    allarr = []
    Natoms = []
    Nconfs = []
    typarr = []
    for i in files:
        print(d+i)
        nc = 0
        for k in namelist:
            try:
                _timeloop = tm.time()
                readarrays = gt.readncdat(d+i+k)
                _timeloop2 = (tm.time() - _timeloop)
                totaltime += _timeloop2
                #print('Computation complete. Time: ' + "{:.4f}".format(_timeloop2) + 'ms')

                shapesarr = [x.shape for x in readarrays]
                typ = readarrays[1]
            except FileNotFoundError:
                readarrays = [np.zeros((0,*x[1:])) for x in shapesarr]
            ncsub, nat, ndim = readarrays[0].shape
            nc += ncsub
            readarrays[0] = readarrays[0].reshape(ncsub*nat,ndim)

            allarr.append(readarrays)

        Natoms.append(nat)
#Network 1 Files
cnstfile1 = wkdir1 + 'rHCNO-4.6A_32-3.1A_a8-8.params'
saefile1 = wkdir1 + 'sae_6-31gd.dat'
nnfdir1 = wkdir1 + 'networks/'

# Network 2 Files
cnstfile2 = wkdir2 + 'rHCNO-4.6A_32-3.1A_a8-8.params'
saefile2 = wkdir2 + 'sae_6-31gd.dat'
nnfdir2 = wkdir2 + 'networks/'

# Construct pyNeuroChem classes
nc1 = pync.pyNeuroChem(cnstfile1, saefile1, nnfdir1, 0)
nc2 = pync.pyNeuroChem(cnstfile2, saefile2, nnfdir2, 0)

xyz, typ, Eact = gt.readncdat(
    '/home/jujuman/Dropbox/Research/ChemSciencePaper/TestCases/C10H20Isomers/isomer_structures_DFT.dat'
)

Eact = np.array(Eact)

# Set the conformers in NeuroChem
nc1.setConformers(confs=xyz, types=typ)
nc2.setConformers(confs=xyz, types=typ)

# Print some data from the NeuroChem
print('Number of Atoms Loaded: ' + str(nc1.getNumAtoms()))
print('Number of Confs Loaded: ' + str(nc1.getNumConfs()))

# Compute Forces of Conformations
print('Computing energies 1...')
_t1b = tm.time()
    xy = sorted(xy, key=lambda x: x[0])
    X, Y = zip(*xy)
    return np.array(Y)

# Set required files for pyNeuroChem
wkdir1    = '/home/jujuman/Research/GDB-11-wB97X-6-31gd/dataset_size_testing/train_08_1.00/'

#Network 1 Files
cnstfile1 = wkdir1 + 'rHCNO-4.5A_32-3.1A_a8-8.params'
saefile1  = wkdir1 + 'sae_6-31gd.dat'
nnfdir1   = wkdir1 + 'networks/'

dtdir = '/home/jujuman/Dropbox/ChemSciencePaper.AER/TestCases/C10H20Isomers/'

#xyz,typ,Eact = gt.readncdat('../data_irc.dat')
xyz,typ,Eact,tmp    = gt.readncdat(dtdir + 'isomer_structures_DFT.dat')
#xyz2,typ2,Eact2,tmp = gt.readncdat(dtdir + 'isomer_structures_DFTB.dat')
#xyz3,typ3,Eact3,tmp = gt.readncdat(dtdir + 'isomer_structures_PM6.dat')

#xyz = [xyz[0],xyz[1]]
#xyz2 = [xyz2[0],xyz2[1]]
#xyz3 = [xyz3[0],xyz3[1]]

xyz = np.asarray(xyz,dtype=np.float32)
xyz = xyz.reshape((xyz.shape[0],len(typ),3))

Eact = np.array(Eact)
#Eact2 = np.array(Eact2)
#Eact3 = np.array(Eact3)

# Construct pyNeuroChem classes
Example #15
0
print('Computing coulomb matrices...')
_t1b = tm.time()

cnt = 0

output_file = open(cmdfile + '.dat', 'wb')

for i in files:
    cnt += 1
    print('FILE: ' + str(cnt) + ' of ' + str(len(files)) + ' ' + i)

    # Set file
    file = dtdir + i

    # Get training molecules
    xyz_tr, typ_tr, Eact_tr, readf = gt.readncdat(file, np.float32)

    xyz_tr = xyz_tr[0:int(P * xyz_tr.shape[0])]
    Eact_tr = Eact_tr[0:int(P * Eact_tr.shape[0])]

    # Compute energy of atoms at infinite separation
    ise = cm.computerISE(typ_tr)

    Eact_tr = (Eact_tr - ise)

    Nm = Eact_tr.shape[0]
    cmat = cm.GenCMatData(xyz_tr, typ_tr, N)

    cmat[:, N * N] = Eact_tr

    cmat.tofile(output_file)