#AND assuming that have written in 1 ps increments... #Also, first frame in trajectory is NOT at time zero, so subtract 1 if endTime == -1: thisPot = alcDat[:, 3:-1] else: thisPot = alcDat[:endTime, 3:-1] thisg = timeseries.statisticalInefficiencyMultiple(thisPot) print("Statistical inefficiency for this set of potential energies: %f" % thisg) #print(startTime) #print(startFrame) #print(thisPot.shape) #Next load in the trajectory and get all solute coordinates that matter top.rb_torsions = pmd.TrackedList([]) top = pt.load_parmed(top, traj=False) if endTime == -1: traj = pt.iterload(trajFile, top, frame_slice=(startFrame, -1)) else: traj = pt.iterload(trajFile, top, frame_slice=(startFrame, startFrame + endTime)) nFrames = len(traj) xyBox = np.array( traj[0].box.values )[: 2] #A little lazy, but all boxes should be same and fixed in X and Y dimensions #print(nFrames)
def main(args): print time.ctime(time.time()) #Get topology file we're working with topFile = args[0] #And figure out if we're dealing with solute at surface or in bulk if (args[1] == 'True'): inBulk = True else: inBulk = False if (args[2] == 'True'): doReweight = True else: doReweight = False #Read in topology file now to get information on solute atoms top = pmd.load_file(topFile) soluteInds = [] for res in top.residues: if res.name not in ['OTM', 'CTM', 'STM', 'NTM', 'SOL']: for atom in res.atoms: soluteInds.append(atom.idx) #Now define how we compute three-body angles with bins and cut-off #Shell cut-off shellCut = 3.32 #1st minimum distance for TIP4P-Ew water at 298.15 K and 1 bar #Number of angle bins nAngBins = 100 #500 #Define bin centers (should be nBins equally spaced between 0 and 180) angBinCents = 0.5 * (np.arange(0.0, 180.001, 180.0/nAngBins)[:-1] + np.arange(0.0, 180.001, 180.0/nAngBins)[1:]) #And distance bins for local oxygen-oxygen RDF calculation #(really distance histograms from central oxygens - can normalize however we want, really) distBinWidth = 0.05 nDistBins = int(shellCut / distBinWidth) distBins = np.arange(0.0, nDistBins*distBinWidth+0.00001, distBinWidth) distBinCents = 0.5 * (distBins[:-1] + distBins[1:]) #Define the size of the probes used for assessing density fluctuations near solute probeRadius = 3.3 # radius in Angstroms; the DIAMETER of a methane so assumes other atoms methane-sized #And bins for numbers of waters in probes probeBins = np.arange(0.0, 21.00001, 1.0) nProbeBins = len(probeBins) - 1 #Will use np.histogram, which includes left edge in bin (so if want up to 20, go to 21) #And will record number waters in each solvation shell (histograms of) shellBins = np.arange(0.0, 251.00001, 1.0) #probably way too many bins, but don't want to run out nShellBins = len(shellBins) - 1 #Should we do 2D histogram for angles and distances? #Or also do 2D histogram for number of waters in probe and three-body angle? #Interesting if make probe radius same size as three-body angle cutoff? #Finally, also define the bins for computing RDFs of waters near all solute atoms #Will use to define 1st and 2nd solvation shells rdfBinWidth = 0.2 rdfMax = 12.00 rdfBins = np.arange(0.0, rdfMax+0.000001, rdfBinWidth) rdfBinCents = 0.5 * (rdfBins[:-1] + rdfBins[1:]) nRDFBins = len(rdfBinCents) rdfBinVols = (4.0*np.pi/3.0)*(rdfBins[1:]**3 - rdfBins[:-1]**3) bulkDens = 0.0332 #Roughly right for TIP4P-EW at 298.15 K and 1 bar in inverse Angstroms cubed #Need to create a variety of arrays to hold the data we're interested in #Will record distributions in both 1st and 2nd solvation shells shellCountsCoupled = np.zeros((nShellBins, 2)) #Histograms for numbers of waters in hydration shells of solutes probeHistsCoupled = np.zeros((nProbeBins, 2)) #Histograms for numbers waters in probes in 1st and 2nd hydration shells angHistsCoupled = np.zeros((nAngBins, 2)) #Histograms of three-body angles for water oxygens within solvation shells distHistsCoupled = np.zeros((nDistBins, 2)) #Histograms of distances to water oxygens from central oxygens solRDFsCoupled = np.zeros((nRDFBins, len(soluteInds))) #RDFs between each solute atom and water oxygens shellCountsDecoupled = np.zeros((nShellBins, 2)) #Same as above, but decoupled state, not coupled state probeHistsDecoupled = np.zeros((nProbeBins, 2)) angHistsDecoupled = np.zeros((nAngBins, 2)) distHistsDecoupled = np.zeros((nDistBins, 2)) solRDFsDecoupled = np.zeros((nRDFBins, len(soluteInds))) #First need configuration weights to use in computing average quantities #But only do if we're using a simuation with an expanded ensemble if doReweight: if inBulk: weightsCoupled, weightsDecoupled = getConfigWeightsBulk(kB=1.0, T=1.0) #Using 1 for kB and T because alchemical_output.txt should already have potential energies in kBT simDirs = ['.'] else: weightsCoupled, weightsDecoupled = getConfigWeightsSurf() simDirs = ['Quad_0.25X_0.25Y', 'Quad_0.25X_0.75Y', 'Quad_0.75X_0.25Y', 'Quad_0.75X_0.75Y'] else: weightsCoupled = np.array([]) weightsDecoupled = np.array([]) simDirs = ['.'] #To correctly match weights up to configurations, need to count frames from all trajectories countFrames = 0 #Next, want to loop over all trajectories and compute RDFs from solute atoms to water oxygens #Will use this to define solvation shells for finding other properties #Actually, having looked at RDFs, just use 5.5 for first shell and 8.5 for second shell... #AND use all atoms, including hydrogens, which have LJ interactions in GAFF2, to define shells #Actually now only using heavy atoms... but when look at RDFs, examine all atoms for adir in simDirs: if doReweight: #Before loading trajectory, figure out how many frames to exclude due to weight equilibration alcDat = np.loadtxt(adir+'/alchemical_output.txt') startTime = alcDat[0, 1] startFrame = int(startTime) - 1 else: startFrame = 0 top = pmd.load_file(topFile) top.rb_torsions = pmd.TrackedList([]) #This is just for SAM systems so that it doesn't break pytraj top = pt.load_parmed(top, traj=False) traj = pt.iterload(adir+'/prod.nc', top, frame_slice=(startFrame, -1)) if not doReweight: weightsCoupled = np.hstack((weightsCoupled, np.ones(len(traj)))) weightsDecoupled = np.hstack((weightsDecoupled, np.ones(len(traj)))) print("\nTopology and trajectory loaded from directory %s" % adir) owInds = top.select('@OW') soluteInds = top.select('!(:OTM,CTM,STM,NTM,SOL)') print("\n\tFound %i water oxygens" % len(owInds)) print("\tFound %i solute atoms" % len(soluteInds)) for i, frame in enumerate(traj): if i%1000 == 0: print "On frame %i" % i boxDims = np.array(frame.box.values[:3]) currCoords = np.array(frame.xyz) #Wrap based on soluate atom center of geometry and get coordinates of interest wrapCOM = np.average(currCoords[soluteInds], axis=0) currCoords = wl.reimage(currCoords, wrapCOM, boxDims) - wrapCOM owCoords = currCoords[owInds] solCoords = currCoords[soluteInds] #Loop over solute atoms and find pair-distance histograms with water oxygens for j, acoord in enumerate(solCoords): solRDFsCoupled[:,j] += (weightsCoupled[countFrames+i] * wl.pairdistancehistogram(np.array([acoord]), owCoords, rdfBinWidth, nRDFBins, boxDims)) solRDFsDecoupled[:,j] += (weightsDecoupled[countFrames+i] * wl.pairdistancehistogram(np.array([acoord]), owCoords, rdfBinWidth, nRDFBins, boxDims)) #Note that pairdistancehistogram is right-edge inclusive, NOT left-edge inclusive #In practice, not a big difference countFrames += len(traj) #Finish by normalizing RDFs properly for j in range(len(soluteInds)): solRDFsCoupled[:,j] /= rdfBinVols #bulkDens*rdfBinVols solRDFsDecoupled[:,j] /= rdfBinVols #bulkDens*rdfBinVols if not doReweight: solRDFsCoupled /= float(countFrames) solRDFsDecoupled /= float(countFrames) #And save to file np.savetxt('solute-OW_RDFs_coupled.txt', np.hstack((np.array([rdfBinCents]).T, solRDFsCoupled)), header='RDF bins (A) solute atom-OW RDF for solute atom indices %s'%(str(soluteInds))) np.savetxt('solute-OW_RDFs_decoupled.txt', np.hstack((np.array([rdfBinCents]).T, solRDFsDecoupled)), header='RDF bins (A) solute atom-OW RDF for solute atom indices %s'%(str(soluteInds))) print("\tFound RDFs for water oxygens from solute indices.") solShell1Cut = 5.5 #Angstroms from all solute atoms (including hydrogens) solShell2Cut = 8.5 #And now that we know how many frames, we can assign real weights if not reweighting if not doReweight: weightsCoupled /= float(countFrames) weightsDecoupled /= float(countFrames) #Reset countFrames so get weights right countFrames = 0 #Repeat looping over trajectories to calculate water properties in solute solvation shell for adir in simDirs: if doReweight: #Before loading trajectory, figure out how many frames to exclude due to weight equilibration alcDat = np.loadtxt(adir+'/alchemical_output.txt') startTime = alcDat[0, 1] startFrame = int(startTime) - 1 else: startFrame = 0 top = pmd.load_file(topFile) top.rb_torsions = pmd.TrackedList([]) #This is just for SAM systems so that it doesn't break pytraj top = pt.load_parmed(top, traj=False) traj = pt.iterload(adir+'/prod.nc', top, frame_slice=(startFrame, -1)) print("\nTopology and trajectory loaded from directory %s" % adir) owInds = top.select('@OW') soluteInds = top.select('!(:OTM,CTM,STM,NTM,SOL)&!(@H=)') surfInds = top.select('(:OTM,CTM,STM,NTM)&!(@H=)') #For probe insertions, also include solute and surface heavy atoms print("\n\tFound %i water oxygens" % len(owInds)) print("\tFound %i solute heavy atoms" % len(soluteInds)) print("\tFound %i non-hydrogen surface atoms" % len(surfInds)) if len(surfInds) == 0: surfInds.dtype=int for i, frame in enumerate(traj): #if i%10 == 0: # print "On frame %i" % i boxDims = np.array(frame.box.values[:3]) currCoords = np.array(frame.xyz) #Wrap based on soluate atom center of geometry and get coordinates of interest wrapCOM = np.average(currCoords[soluteInds], axis=0) currCoords = wl.reimage(currCoords, wrapCOM, boxDims) - wrapCOM owCoords = currCoords[owInds] solCoords = currCoords[soluteInds] surfCoords = currCoords[surfInds] #Now get solvent shells around solute shell1BoolMat = wl.nearneighbors(solCoords, owCoords, boxDims, 0.0, solShell1Cut) shell1Bool = np.array(np.sum(shell1BoolMat, axis=0), dtype=bool) shell2BoolMat = wl.nearneighbors(solCoords, owCoords, boxDims, solShell1Cut, solShell2Cut) shell2Bool = np.array(np.sum(shell2BoolMat, axis=0), dtype=bool) #And add weight to histogram for numbers of waters in shells thisCount1 = int(np.sum(shell1Bool)) shellCountsCoupled[thisCount1, 0] += weightsCoupled[countFrames+i] shellCountsDecoupled[thisCount1, 0] += weightsDecoupled[countFrames+i] thisCount2 = int(np.sum(shell2Bool)) shellCountsCoupled[thisCount2, 1] += weightsCoupled[countFrames+i] shellCountsDecoupled[thisCount2, 1] += weightsDecoupled[countFrames+i] #And compute water properties of solvent shells, first 3-body angles thisAngs1, thisNumAngs1 = wp.getCosAngs(owCoords[shell1Bool], owCoords, boxDims, highCut=shellCut) thisAngHist1, thisAngBins1 = np.histogram(thisAngs1, bins=nAngBins, range=[0.0, 180.0], density=False) angHistsCoupled[:,0] += weightsCoupled[countFrames+i] * thisAngHist1 angHistsDecoupled[:,0] += weightsDecoupled[countFrames+i] * thisAngHist1 thisAngs2, thisNumAngs2 = wp.getCosAngs(owCoords[shell2Bool], owCoords, boxDims, highCut=shellCut) thisAngHist2, thisAngBins2 = np.histogram(thisAngs2, bins=nAngBins, range=[0.0, 180.0], density=False) angHistsCoupled[:,1] += weightsCoupled[countFrames+i] * thisAngHist2 angHistsDecoupled[:,1] += weightsDecoupled[countFrames+i] * thisAngHist2 #And ow-ow pair distance histograms in both shells as well thisDistHist1 = wl.pairdistancehistogram(owCoords[shell1Bool], owCoords, distBinWidth, nDistBins, boxDims) distHistsCoupled[:,0] += weightsCoupled[countFrames+i] * thisDistHist1 distHistsDecoupled[:,0] += weightsDecoupled[countFrames+i] * thisDistHist1 thisDistHist2 = wl.pairdistancehistogram(owCoords[shell2Bool], owCoords, distBinWidth, nDistBins, boxDims) distHistsCoupled[:,1] += weightsCoupled[countFrames+i] * thisDistHist2 distHistsDecoupled[:,1] += weightsDecoupled[countFrames+i] * thisDistHist2 #Next compute distributions of numbers of waters in probes centered within each shell #To do this, create random grid of points in SQUARE that encompasses both shells #Then only keep points within each shell based on distance #Square will be based on shell cutoffs and min and max coordinates in each dimension of solute minSolX = np.min(solCoords[:,0]) - solShell2Cut maxSolX = np.max(solCoords[:,0]) + solShell2Cut minSolY = np.min(solCoords[:,1]) - solShell2Cut maxSolY = np.max(solCoords[:,1]) + solShell2Cut minSolZ = np.min(solCoords[:,2]) - solShell2Cut maxSolZ = np.max(solCoords[:,2]) + solShell2Cut thisGridX = minSolX + np.random.random(500)*(maxSolX - minSolX) thisGridY = minSolY + np.random.random(500)*(maxSolY - minSolY) thisGridZ = minSolZ + np.random.random(500)*(maxSolZ - minSolZ) thisGrid = np.vstack((thisGridX, thisGridY, thisGridZ)).T gridBoolMat1 = wl.nearneighbors(solCoords, thisGrid, boxDims, 0.0, solShell1Cut) gridBool1 = np.array(np.sum(gridBoolMat1, axis=0), dtype=bool) thisNum1 = wl.probegrid(np.vstack((owCoords, surfCoords, solCoords)), thisGrid[gridBool1], probeRadius, boxDims) thisProbeHist1, thisProbeBins1 = np.histogram(thisNum1, bins=probeBins, density=False) probeHistsCoupled[:,0] += weightsCoupled[countFrames+i] * thisProbeHist1 probeHistsDecoupled[:,0] += weightsDecoupled[countFrames+i] * thisProbeHist1 gridBoolMat2 = wl.nearneighbors(solCoords, thisGrid, boxDims, solShell1Cut, solShell2Cut) gridBool2 = np.array(np.sum(gridBoolMat2, axis=0), dtype=bool) thisNum2 = wl.probegrid(np.vstack((owCoords, surfCoords, solCoords)), thisGrid[gridBool2], probeRadius, boxDims) thisProbeHist2, thisProbeBins2 = np.histogram(thisNum2, bins=probeBins, density=False) probeHistsCoupled[:,1] += weightsCoupled[countFrames+i] * thisProbeHist2 probeHistsDecoupled[:,1] += weightsDecoupled[countFrames+i] * thisProbeHist2 countFrames += len(traj) #Should have everything we need, so save to text files np.savetxt('solute_shell_hists.txt', np.hstack((np.array([shellBins[:-1]]).T, shellCountsCoupled, shellCountsDecoupled)), header='Histograms of numbers of waters in first and second solute solvation shells with solvent in coupled (columns 2, 3) and decoupled (columns 4, 5) states') np.savetxt('solute_probe_hists.txt', np.hstack((np.array([probeBins[:-1]]).T, probeHistsCoupled, probeHistsDecoupled)), header='Number waters in probe histograms in first and second solute solvation shells with solvent in coupled (columns 2, 3) and decoupled (columns 4, 5) states') np.savetxt('solute_ang_hists.txt', np.hstack((np.array([angBinCents]).T, angHistsCoupled, angHistsDecoupled)), header='3-body angle histograms in first and second solute solvation shells with solvent in coupled (columns 2, 3) and decoupled (columns 4, 5) states') np.savetxt('solute_pair_hists.txt', np.hstack((np.array([distBinCents]).T, distHistsCoupled, distHistsDecoupled)), header='O-O pair-distance histograms in first and second solute solvation shells with solvent in coupled (columns 2, 3) and decoupled (columns 4, 5) states') print time.ctime(time.time())
def getConfigWeightsSurf(kB=0.008314459848, T=298.15): """Computes and returns the configuration weights for simulations with a solute at an interface. Mostly replicates calcdGsolv in genetic_lib, but returns config weights in both the fully coupled and decoupled states (also includes pV term - won't matter very much for free energy differences, but maybe matters for weighting configurations, even though also probably not too much). """ #First define directory structure, spring constants, etc. simDirs = ['Quad_0.25X_0.25Y', 'Quad_0.25X_0.75Y', 'Quad_0.75X_0.25Y', 'Quad_0.75X_0.75Y'] kXY = [10.0, 10.0, 10.0, 10.0] #spring constant in kJ/mol*A^2 refX = [7.4550, 7.4550, 22.3650, 22.3650] refY = [8.6083, 25.8249, 8.6083, 25.8249] distRefX = [7.4550, 7.4550, 7.4550, 7.4550] distRefY = [8.6083, 8.6083, 8.6083, 8.6083] numStates = 19 #And some constants kBT = kB*T beta = 1.0 / kBT #First make sure all the input arrays have the same dimensions numSims = len(simDirs) allLens = np.array([len(a) for a in [kXY, refX, refY, distRefX, distRefY]]) #Want to loop over all trajectories provided, storing solute position information to calculate restraints xyPos = None #X and Y coordinates of first heavy atom for all solutes - get shape later nSamps = np.zeros((len(simDirs), numStates), dtype=int) #Have as many x-y restraints as sims and same number of lambda states for each allPots = np.array([[]]*numStates).T #Potential energies, EXCLUDING RESTRAINT, for each simulation frame and lambda state #Will also include pV term because may matter for configurations xyBox = np.zeros(2) for i, adir in enumerate(simDirs): topFile = "%s/../sol_surf.top"%adir trajFile = "%s/prod.nc"%adir alchemicalFile = "%s/alchemical_output.txt"%adir #First load in topology and get atom indices top = pmd.load_file(topFile) #Get solute heavy atoms for each solute #Also get indices of surface atoms to use as references later #Only taking last united atoms of first SAM molecule we find heavyIndices = [] for res in top.residues: if res.name not in ['OTM', 'CTM', 'STM', 'NTM', 'SOL']: #Assumes working with SAM surface... thisheavyinds = [] for atom in res.atoms: if not atom.name[0] == 'H': thisheavyinds.append(atom.idx) heavyIndices.append(thisheavyinds) #Make into arrays for easier referencing heavyIndices = np.array(heavyIndices) #Load in the potential energies, INCLUDING RESTRAINT, at all states for this simulation to figure out frames to skip alcDat = np.loadtxt(alchemicalFile) startTime = alcDat[0, 1] startFrame = int(startTime) - 1 #Be careful here... need write frequency in alchemical file to match exactly with positions #AND assuming that have written in 1 ps increments... #Also, first frame in trajectory is NOT at time zero, so subtract 1 thisPot = alcDat[:, 3:-1] thispV = alcDat[:, -1] #Next load in the trajectory and get all solute coordinates that matter top.rb_torsions = pmd.TrackedList([]) top = pt.load_parmed(top, traj=False) traj = pt.iterload(trajFile, top, frame_slice=(startFrame, -1)) nFrames = len(traj) xyBox = np.array(traj[0].box.values)[:2] #A little lazy, but all boxes should be same and fixed in X and Y dimensions thisxyPos = np.zeros((nFrames, len(heavyIndices), 2)) thisnSamps = np.zeros(numStates, dtype=int) #Reference x and y coordinates for this restraint thisRefXY = np.array([refX[i], refY[i]]) for j, frame in enumerate(traj): thisPos = np.array(frame.xyz) thisXY = thisPos[heavyIndices[:,0]][:, :2] #Takes XY coords for first heavy atom from each solute thisxyPos[j,:] = thisXY thisnSamps[int(alcDat[j, 2])] += 1 #Lambda states must be indexed starting at 0 #Also get wrapped positions relative to each reference face #AND calculate xy restraint energy to remove by adding this for each solute xyEnergy = 0.0 for k in range(len(heavyIndices)): xy = thisXY[k] #Then separately reimage around the restraint reference positions to calculate energy xy = wl.reimage([xy], thisRefXY, xyBox)[0] - thisRefXY xyEnergy += ( 0.5*kXY[i]*(0.5*(np.sign(xy[0] - distRefX[i]) + 1))*((xy[0] - distRefX[i])**2) + 0.5*kXY[i]*(0.5*(np.sign(xy[1] - distRefY[i]) + 1))*((xy[1] - distRefY[i])**2) ) #Remove the restraint energy (only for x-y restraint... z is the same in all simulations) thisPot[j,:] -= (xyEnergy / kBT) #And also add in pV contribution thisPot[j,:] += thispV[j] #Add to other things we're keeping track of if xyPos is None: xyPos = copy.deepcopy(thisxyPos) else: xyPos = np.vstack((xyPos, thisxyPos)) nSamps[i,:] = thisnSamps allPots = np.vstack((allPots, thisPot)) #Now should have all the information we need #Next, put it into the format that MBAR wants, adding energies as needed Ukn = np.zeros((len(simDirs)*numStates, int(np.sum(nSamps)))) for i in range(len(simDirs)): #First get energy of ith type of x-y restraint for all x-y positions thisRefXY = np.array([refX[i], refY[i]]) #Must do by looping over each solute xyEnergy = np.zeros(xyPos.shape[0]) for k in range(len(heavyIndices)): xy = wl.reimage(xyPos[:,k,:], thisRefXY, xyBox) - thisRefXY xyEnergy += ( 0.5*kXY[i]*(0.5*(np.sign(xy[:,0] - distRefX[i]) + 1))*((xy[:,0] - distRefX[i])**2) + 0.5*kXY[i]*(0.5*(np.sign(xy[:,1] - distRefY[i]) + 1))*((xy[:,1] - distRefY[i])**2) ) #Loop over alchemical states with this restraint and add energy for j in range(numStates): Ukn[i*numStates+j, :] = allPots[:,j] + (xyEnergy / kBT) #Now should be set to run MBAR mbarObj = mbar.MBAR(Ukn, nSamps.flatten()) #Following computePMF in MBAR to get configuration weights with desired potential of interest logwCoupled = mbarObj._computeUnnormalizedLogWeights(allPots[:,0]) logwDecoupled = mbarObj._computeUnnormalizedLogWeights(allPots[:,-1]) #Also report average solute-system LJ and coulombic potential energies in the fully coupled ensemble #(with restraints removed) #Just printing these values avgQ, stdQ = mbarObj.computeExpectations(allPots[:,0] - allPots[:,4], allPots[:,0]) avgLJ, stdLJ = mbarObj.computeExpectations(allPots[:,4] - allPots[:,-1], allPots[:,0]) print("\nAverage solute-system electrostatic potential energy: %f +/- %f"%(avgQ, stdQ)) print("Average solute-system LJ potential energy: %f +/- %f\n"%(avgLJ, stdLJ)) #Also print information that can be used to break free energy into components #Start by just printing all of the free energies between states alldGs, alldGerr = mbarObj.computePerturbedFreeEnergies(allPots.T) print("\nAll free energies relative to first (coupled) state:") print(alldGs.tolist()) print(alldGerr.tolist()) #And the free energy changes associated with just turning on LJ and elctrostatics separately dGq = alldGs[0][0] - alldGs[0][4] dGqErr = np.sqrt((alldGerr[0][0]**2) + (alldGerr[0][4])**2) print("\nElectrostatic dG (with LJ on): %f +/- %f"%(dGq, dGqErr)) dGlj = alldGs[4][4] - alldGs[4][-1] dGljErr = np.sqrt((alldGerr[4][4]**2) + (alldGerr[4][-1])**2) print("\nLJ dG (no charges): %f +/- %f"%(dGlj, dGljErr)) #Now calculate average potential energy differences needed for computing relative entropies dUq, dUqErr = mbarObj.computeExpectations(allPots[:,0] - allPots[:,4], allPots[:,0]) print("\nAverage electrostatic potential energy in fully coupled state: %f +/- %f"%(dUq, dUqErr)) dUlj, dUljErr = mbarObj.computeExpectations(allPots[:,4] - allPots[:,-1], allPots[:,4]) print("\nAverage LJ potential energy (no charges) in uncharged state: %f +/- %f"%(dUlj, dUljErr)) #And return weights after exponentiating log weights and normalizing wCoupled = np.exp(logwCoupled) wCoupled /= np.sum(wCoupled) wDecoupled = np.exp(logwDecoupled) wDecoupled /= np.sum(wDecoupled) return wCoupled, wDecoupled
def main(args): print time.ctime(time.time()) #To define some things, read in the first simulation #Will then assume all other simulations are set up in an identical way #(i.e. same box size, same number of frames) topFile = args[0] simDirs = args[1:] #First load in the topology and trajectory top = pmd.load_file(topFile) top.rb_torsions = pmd.TrackedList( []) #This is just for SAM systems so that it doesn't break pytraj top = pt.load_parmed(top, traj=False) traj = pt.iterload(simDirs[0] + '/prod.nc', top) boxDims = np.array(traj[0].box.values[:3]) #Before starting, create bins in the x, y, and z-directions #This will be used to define instantaneous interfaces and record some spatially varying interfacial properties gridSize = 1.0 #Angstroms xGrid = np.arange(-boxDims[0] / 2.0, boxDims[0] / 2.0 + gridSize, gridSize) #Bins may overlap, but that's ok yGrid = np.arange(-boxDims[1] / 2.0, boxDims[1] / 2.0 + gridSize, gridSize) zGrid = np.arange((-boxDims[2] + boxDims[2] % gridSize) / 2.0, (boxDims[2] - boxDims[2] % gridSize) / 2.0 + 0.001, gridSize) #Symmetrizing grid in z direction print "Using following coarse grids in x-, y-, and z-dimensions:" print xGrid print yGrid print zGrid #Define grid-point centers xGridLocs = 0.5 * (xGrid[:-1] + xGrid[1:]) yGridLocs = 0.5 * (yGrid[:-1] + yGrid[1:]) zGridLocs = 0.5 * (zGrid[:-1] + zGrid[1:]) #And define grid sizes xSize = len(xGridLocs) ySize = len(yGridLocs) zSize = len(zGridLocs) #Now set-up fine grid for actually identifying the interface more precisely zGridSize = 0.1 #Set separate grid size in z direction for a fine grid used for finding all density profiles zGridFine = np.arange((-boxDims[2] + boxDims[2] % zGridSize) / 2.0, (boxDims[2] - boxDims[2] % zGridSize) / 2.0 + 0.001, zGridSize) #Define grid-point centers for fine grid zGridLocsFine = 0.5 * (zGridFine[:-1] + zGridFine[1:]) zSizeFine = len(zGridLocsFine) #For calculating some properties as a function as distance from the surface, will use specific z slices #While all the grid stuff is relative to surface SU atoms, want z slices relative to the interface itself sliceGridSize = 0.5 zSlice = np.arange(-6.0, 12.000001, sliceGridSize) zSliceLocs = 0.5 * (zSlice[:-1] + zSlice[1:]) zSliceSize = len(zSliceLocs) #Set the default bulk density bulkDens = 0.0332 #Roughly right for TIP4P-EW at 298.15 K and 1 bar in inverse Angstroms cubed #Define the density fraction for determining the interface location fracDens = 0.3 #this is lower than 0.5 which is usually used by Willard and others #however, it puts the interface a little closer to the surface atoms and, #as Willard says in his 2014 paper, any value between 0.3 and 0.7 works densCut = fracDens * bulkDens print "\nUsing bulk density value of %f (TIP4P-Ew at 1 bar and 298.15 K)." % bulkDens print "To define interface, using a bulk density fraction of %f." % fracDens #Define the size of the probes used for assessing hydrophobicity probeRadius = 3.3 # radius in Angstroms; the DIAMETER of a methane (so assumes all other atoms methane-size) #Now define how we compute three-body angles with bins and cut-off #Shell cut-off shellCut = 3.32 #1st minimum distance for TIP4P-Ew water at 298.15 K and 1 bar #Number of angle bins nAngBins = 100 #500 #Define bin centers (should be nBins equally spaced between 0 and 180) angBinCents = 0.5 * (np.arange(0.0, 180.001, 180.0 / nAngBins)[:-1] + np.arange(0.0, 180.001, 180.0 / nAngBins)[1:]) #And distance bins for local RDF calculation #(really distance histograms from central oxygens - can normalize however we want, really) distBinWidth = 0.05 nDistBins = int(shellCut / distBinWidth) distBins = np.arange(0.0, nDistBins * distBinWidth + 0.00001, distBinWidth) distBinCents = 0.5 * (distBins[:-1] + distBins[1:]) #And bins for numbers of waters in probes probeBins = np.arange(0.0, 21.00001, 1.0) nProbeBins = len( probeBins ) - 1 #Will use np.histogram, which includes left edge in bin (so if want up to 20, go to 21) #Should we do 2D histogram for angles and distances? #Or also do 2D histogram for number of waters in probe and three-body angle? #Interesting if make probe radius same size as three-body angle cutoff? #Need to create a variety of arrays to hold the data we're interested in interfaceZmean = np.zeros( (xSize, ySize) ) #Average mean interface height at each x-y bin - same in all x-y bins interfaceZ = np.zeros( (xSize, ySize)) #Average instantaneous interface height at each x-y bin interfaceZSq = np.zeros( (xSize, ySize )) #Squared average interface height (to compute height fluctuations) #This can be more easily compared to mean-field studies watDensFine = np.zeros( zSizeFine ) #Water density on finer grid for instantaneous interface definition watDensFineMean = np.zeros( zSizeFine) #Same but for mean interface definition surfDensFine = np.zeros( zSizeFine ) #Density profile for surface heavy atoms (ignores interface definition) probeHists = np.zeros( (nProbeBins, zSliceSize )) #Histograms for numbers of waters in probes at each z-slice angHists = np.zeros( (nAngBins, zSliceSize) ) #Histograms of three-body angles for water oxygens within in each z-slice distHists = np.zeros( (nDistBins, zSliceSize )) #Histograms of distances to water oxygens from central oxygens probeHistsMean = np.zeros( (nProbeBins, zSliceSize)) #And same definitions but for mean interfaces angHistsMean = np.zeros((nAngBins, zSliceSize)) distHistsMean = np.zeros((nDistBins, zSliceSize)) #Since will use multiple trajectories, also need to count total frames we're averaging over totFrames = 0.0 #Now need to define variables to hold the surface points and normal vectors at each point #Note that by surface points, here we just mean the INDICES of the x, y, and z grids #To access the actual points, need to reference xGridLocs, etc. #In the 'instant' interface definition, the exact locations of surface points changes, but #only keep one surface point per x-y bin, so when project onto x-y plane to look at heat #plots can use the same grid point locations as for the fixed, 'mean' interface #With the 'mean' definition, this will be set below and unchanged when computing metrics surfacePoints = np.zeros((xSize * ySize, 3), dtype=int) surfaceNorms = np.zeros( (xSize * ySize, 3)) #Should keep as normalized vectors throughout surfacePointsMean = np.zeros((xSize * ySize, 3), dtype=int) surfaceNormsMean = np.zeros((xSize * ySize, 3)) #But also want an actual list of all possible grid-point locations... xmesh, ymesh, zmesh = np.meshgrid(np.arange(xSize), np.arange(ySize), np.arange(zSize)) gridPoints = np.vstack( (xmesh.flatten(), ymesh.flatten(), zmesh.flatten())).T #At this point, want to actually loop over the simulation directories, load simulations, #and do calculations for each frame for adir in simDirs: top = pmd.load_file(topFile) top.rb_torsions = pmd.TrackedList( []) #This is just for SAM systems so that it doesn't break pytraj top = pt.load_parmed(top, traj=False) traj = pt.iterload(adir + '/prod.nc', top) print("\nTopology and trajectory loaded from directory %s" % adir) owInds = top.select('@OW') surfInds = top.select('(:OTM,CTM,STM,NTM)&!(@H=)') suInds = top.select('@SU') print("\n\tFound %i water oxygens" % len(owInds)) print("\tFound %i non-hydrogen surface atoms" % len(surfInds)) print("\tFound %i SU surface atoms" % len(suInds)) nFrames = float( traj.n_frames) #Make it a float to make averaging easier later totFrames += nFrames tempWatDensFineMean = np.zeros(zSizeFine) #Find the mean interface definition to use for this trajectory #Need to loop over the trajectory once and find mean interface location for i, frame in enumerate(traj): boxDims = np.array(frame.box.values[:3]) currCoords = np.array(frame.xyz) #Need to do wrapping procedure by first putting surface together, then wrapping water around it wrapCOM1 = currCoords[suInds[0]] currCoords = wl.reimage(currCoords, wrapCOM1, boxDims) - wrapCOM1 wrapCOM2 = np.average(currCoords[suInds], axis=0) currCoords = wl.reimage(currCoords, wrapCOM2, boxDims) - wrapCOM2 #Bin the current z coordinates thisSurfDens, tempBins = np.histogram(currCoords[surfInds, 2], bins=zGridFine, normed=False) thisWatDens, tempBins = np.histogram(currCoords[owInds, 2], bins=zGridFine, normed=False) surfDensFine += thisSurfDens tempWatDensFineMean += thisWatDens #Record fine density, then normalize to number densities for next calculation watDensFineMean += tempWatDensFineMean tempWatDensFineMean = tempWatDensFineMean / (nFrames * boxDims[0] * boxDims[1] * zGridSize) #Find the average density in a region far from the surface to use as a reference #Recall that the wrapping procedure puts the surface center of geometry at the origin refOWdens = np.average( 0.5 * (tempWatDensFineMean[-int(1.0 / zGridSize) - 1:-1] + tempWatDensFineMean[1:int(1.0 / zGridSize) + 1])) #Above uses 1.0 A slices at edges of wrapped box #It's a useful quantity to report, but better to just use fixed cut-off for all simulations... print "\n\tDensity value in center of water phase (far from interface): %f" % refOWdens #And find where the water density is half of its "bulk" value #Exclude grid points near box edge because fluctuations in box lead to weird densities there loInd = np.argmin(abs(tempWatDensFineMean[5:(zSizeFine / 2)] - densCut)) print "\tOn lower surface, mean interface is at following Z-coordinate:" print "\t Lower: %f" % zGridLocsFine[loInd] #Now set up arrays of surface points and surface normal vectors - easy for mean interface thisxmesh, thisymesh = np.meshgrid(np.arange(xSize, dtype=int), np.arange(ySize, dtype=int)) surfacePointsMean[:, 0:2] = np.vstack( (thisxmesh.flatten(), thisymesh.flatten())).T surfacePointsMean[:, 2] = loInd surfaceNormsMean[:, 2] = -1.0 #Just working with LOWER surface (solute is on top) #Just go ahead and record the average interface height at each x and y bin, since it won't change interfaceZmean[:, :] += zGridLocsFine[loInd] #Now should be ready to loop over the trajectory using both interface definitions and computing things print "\nPre-processing finished, starting main loop over trajectory." for i, frame in enumerate(traj): #if i%1000 == 0: # print "On frame %i" % i boxDims = np.array(frame.box.values[:3]) currCoords = np.array(frame.xyz) #Need to do a wrapping procedure to more easily find waters within certain layers of surface wrapCOM1 = currCoords[suInds[0]] currCoords = wl.reimage(currCoords, wrapCOM1, boxDims) - wrapCOM1 wrapCOM2 = np.average(currCoords[suInds], axis=0) currCoords = wl.reimage(currCoords, wrapCOM2, boxDims) - wrapCOM2 OWCoords = currCoords[owInds] suCoords = currCoords[suInds] surfCoords = currCoords[surfInds] surfMidZ = np.average(suCoords[:, 2]) #Get actual locations of surface points (not just indices) #Will overwrite with off-lattice x, y, and z positions for instantaneous interface #Just use fine z-grid for mean definition thisSurf = np.vstack( (xGridLocs[surfacePoints[:, 0]], yGridLocs[surfacePoints[:, 1]], zGridLocsFine[surfacePoints[:, 2]])).T thisSurfMean = np.vstack((xGridLocs[surfacePointsMean[:, 0]], yGridLocs[surfacePointsMean[:, 1]], zGridLocsFine[surfacePointsMean[:, 2]])).T #Use water coordinates to find instantaneous interface #First need to find the density field #Note that we use 2.4 as the smoothing length, as in Willard and Chandler, 2010 thisdensfield, thisdensnorms = wl.willarddensityfield( OWCoords, xGridLocs, yGridLocs, zGridLocs, boxDims, 2.4) #Next need to define the interface - using the marchinge cubes algorithm in scikit-image verts, faces, normals, values = skmeasure.marching_cubes_lewiner( thisdensfield, densCut, spacing=(gridSize, gridSize, gridSize)) #Shift the points returned... verts[:, 0] += xGrid[0] verts[:, 1] += yGrid[0] verts[:, 2] += zGrid[0] #And make sure the points are in the box #Actually, for the purposes of making sure we have a single interface point for each x-y grid cell, #we DO NOT want to wrap. This is because the grid may extend past the box size. For accurately #calculating distances this is an issue because points outside the box will never overlap with #wrapped atoms. But, we can't wrap now... have to wait until after we have our interface points. #Below plot is for debugging only #fig = plt.figure(figsize=(10, 10)) #ax = fig.add_subplot(111, projection='3d') #ax.scatter(verts[:,0], verts[:,1], verts[:,2]) #ax.set_xlabel('X') #ax.set_ylabel('Y') #ax.set_zlabel('Z') #fig.tight_layout() #plt.show() #Now need to trim the points so that we only have one point below the surface for #each x-y bin. To do this, I'm taking the min z of the upper surface and max z of the lower surface. #With this definition, hopefully odd blips in the bulk that satisfy the isosurface definition will be #excluded. newvertmat = np.ones( (xSize, ySize, 3)) #Will flatten, but for now use data structure to help newvertmat[:, :, 2] = -10000.0 #Loop over old vertices for avert in verts: thisXind = np.digitize([avert[0]], xGrid)[0] - 1 thisYind = np.digitize([avert[1]], yGrid)[0] - 1 #Check the lower interface if (avert[2] < surfMidZ and avert[2] > newvertmat[thisXind, thisYind, 2] and avert[2] > zGrid[0]): newvertmat[thisXind, thisYind, :] = avert #Need to make sure that all x-y bins had a vertex in them... #If not, use the z-value of one of the 4 adjacent points that isn't also too large unfilledbins = np.where(abs(newvertmat[:, :, :]) == 10000.0) for l in range(len(unfilledbins[0])): ind1 = unfilledbins[0][l] #The x bin ind2 = unfilledbins[1][l] #The y bin newvertmat[ind1, ind2, 0] = xGridLocs[ind1] newvertmat[ind1, ind2, 1] = yGridLocs[ind2] #Use modulo operator to do wrapping if abs(newvertmat[(ind1 - 1) % xSize, ind2, 2]) < 1000.0: newvertmat[ind1, ind2, 2] = newvertmat[(ind1 - 1) % xSize, ind2, 2] elif abs(newvertmat[(ind1 + 1) % xSize, ind2, 2]) < 1000.0: newvertmat[ind1, ind2, 2] = newvertmat[(ind1 + 1) % xSize, ind2, 2] elif abs(newvertmat[ind1, (ind2 - 1) % ySize, 2]) < 1000.0: newvertmat[ind1, ind2, 2] = newvertmat[ind1, (ind2 - 1) % ySize, 2] elif abs(newvertmat[ind1, (ind2 + 1) % ySize, 2]) < 1000.0: newvertmat[ind1, ind2, 2] = newvertmat[ind1, (ind2 + 1) % ySize, 2] #While the points are in a convenient format, record the interface height at each x-y bin interfaceZ += newvertmat[:, :, 2] interfaceZSq += newvertmat[:, :, 2]**2 #If the above procedure didn't fix the issue, just quit and recommend a finer z-grid size unfilledbins = np.where(abs(newvertmat[:, :, 2]) == 10000.0) if len(unfilledbins[0]) > 0: print "Error: after trimming surface points, unable to find surface point for each x-y bin." print "Could fix this by not using all bins and keeping track of bin counts, but maybe later." print "Try and use a finer bin size in the z-dimension." sys.exit(2) #Now put together list of surface points (indices on pre-set grid) and exact locations (and unit normals) newverts = np.reshape(newvertmat[:, :, :], (xSize * ySize, 3)) #fig = plt.figure(figsize=(10, 10)) #ax = fig.add_subplot(111, projection='3d') #ax.scatter(newverts[:,0], newverts[:,1], newverts[:,2], c='gray') #ax.set_xlabel('X') #ax.set_ylabel('Y') #ax.set_zlabel('Z') #fig.tight_layout() #plt.show() surfacePoints[:, 0] = np.digitize(newverts[:, 0], xGrid) - 1 surfacePoints[:, 1] = np.digitize(newverts[:, 1], yGrid) - 1 surfacePoints[:, 2] = np.digitize(newverts[:, 2], zGrid) - 1 thisSurf = copy.deepcopy(newverts) unusedDensVals, surfaceNorms = wl.willarddensitypoints( OWCoords, thisSurf, boxDims, 2.4) #print surfacePoints #print thisSurf #print surfaceNorms #print np.linalg.norm(surfaceNorms, axis=1) #At this point, MUST wrap our interface points into box so that distances from waters to them are accurate #We couldn't do this earlier because we need to assign a single interface point to each x-y grid cell thisSurf = wl.reimage(thisSurf, np.zeros(3), boxDims) thisSurfMean = wl.reimage(thisSurfMean, np.zeros(3), boxDims) #fig = plt.figure(figsize=(10, 10)) #ax = fig.add_subplot(111, projection='3d') #ax.scatter(thisSurf[:,0], thisSurf[:,1], thisSurf[:,2], c='orange') #ax.set_xlabel('X') #ax.set_ylabel('Y') #ax.set_zlabel('Z') #fig.tight_layout() #plt.show() #Want to find the density profile #To do this, first need to find which surface point closest to each water #Then project that water's distance from the point along the surface normal #This gives distances #Note that surfaceNorms should be normalized to length 1! #Also, don't worry about the random cutoff of 3.0 that I supplied... this is just part of the routine #Really only want thisWatDists from this function - other stuff is not as useful for this code thisWatClose, thisSurfClose, thisSliceNum, thisWatDists = wl.interfacewater( OWCoords, thisSurf, surfaceNorms, 3.0, boxDims) #Now add to the instantaneous fine water density profile - mean is already done thisWatHist, tempBins = np.histogram(thisWatDists, bins=zGridFine, normed=False) watDensFine += thisWatHist #Now want to look at properties within z-slices moving normal to both interface definitions #For three-body angles and pair distances, digitize waters for each interface definition thisSliceInds = np.digitize(thisWatDists, zSlice) - 1 thisSliceIndsMean = np.digitize( -1.0 * (OWCoords[:, 2] - zGridLocsFine[surfacePointsMean[0, 2]]), zSlice) - 1 #For probe insertions, placing probes at random x, y, and z locations within slices #Make sure to wrap points after randomization to make sure not outside the simulation box #Note different grid spacing for z slices, so need to do z separately randomGrid = np.zeros(thisSurf.shape) randomGrid[:, :2] = thisSurf[:, :2] + (np.random.random_sample( (len(thisSurf), 2)) - 0.5) * 2.0 * gridSize randomGrid[:, 2] = thisSurf[:, 2] + (np.random.random_sample( len(thisSurf)) - 0.5) * 2.0 * sliceGridSize randomGrid = wl.reimage(randomGrid, np.zeros(3), boxDims) randomGridMean = np.zeros(thisSurfMean.shape) randomGridMean[:, :2] = thisSurfMean[:, :2] + ( np.random.random_sample( (len(thisSurfMean), 2)) - 0.5) * 2.0 * gridSize randomGridMean[:, 2] = thisSurfMean[:, 2] + (np.random.random_sample( len(thisSurfMean)) - 0.5) * 2.0 * sliceGridSize randomGridMean = wl.reimage(randomGridMean, np.zeros(3), boxDims) #And loop over z indices, selecting waters and calculating what we want in those slices for j in range(zSliceSize): thisSliceCoords = OWCoords[np.where(thisSliceInds == j)[0]] thisSliceCoordsMean = OWCoords[np.where( thisSliceIndsMean == j)[0]] #Make sure we have waters in slices before attempting anything if len(thisSliceCoords) > 0: #Three-body angles thisAngs, thisNumAngs = wp.getCosAngs(thisSliceCoords, OWCoords, boxDims, highCut=shellCut) thisAngHist, thisAngBins = np.histogram(thisAngs, bins=nAngBins, range=[0.0, 180.0], density=False) angHists[:, j] += thisAngHist #Distance histograms with these slice oxygens as central oxygens thisDistHist = wl.pairdistancehistogram( thisSliceCoords, OWCoords, distBinWidth, nDistBins, boxDims) distHists[:, j] += thisDistHist #Now probe occupancies #Need to get random z locations within this slice thisGrid = randomGrid + surfaceNorms * zSliceLocs[j] thisNum = wl.probegrid(np.vstack((OWCoords, surfCoords)), thisGrid, probeRadius, boxDims) thisProbeHist, thisProbeBins = np.histogram(thisNum, bins=probeBins, density=False) probeHists[:, j] += thisProbeHist if len(thisSliceCoordsMean) > 0: thisAngsMean, thisNumAngsMean = wp.getCosAngs( thisSliceCoordsMean, OWCoords, boxDims, highCut=shellCut) thisAngHistMean, thisAngBinsMean = np.histogram( thisAngsMean, bins=nAngBins, range=[0.0, 180.0], density=False) angHistsMean[:, j] += thisAngHistMean thisDistHistMean = wl.pairdistancehistogram( thisSliceCoordsMean, OWCoords, distBinWidth, nDistBins, boxDims) distHistsMean[:, j] += thisDistHistMean thisGridMean = randomGridMean + surfaceNormsMean * zSliceLocs[j] thisNumMean = wl.probegrid(np.vstack((OWCoords, surfCoords)), thisGridMean, probeRadius, boxDims) thisProbeHistMean, thisProbeBinsMean = np.histogram( thisNumMean, bins=probeBins, density=False) probeHistsMean[:, j] += thisProbeHistMean #Done looping over trajectories, etc. #Now finish computing some quantities by averaging appropriately interfaceZmean /= float(len(simDirs)) interfaceZ /= totFrames interfaceZSq /= totFrames watDensFine /= (totFrames * boxDims[0] * boxDims[1] * zGridSize) watDensFineMean /= (totFrames * boxDims[0] * boxDims[1] * zGridSize) surfDensFine /= (totFrames * boxDims[0] * boxDims[1] * zGridSize) #Just let the histograms be histograms... can normalize later if need to (don't think numbers will get too big) #probeHists /= totFrames #angHists /= totFrames #distHists /= totFrames #probeHistsMean /= totFrames #angHistsMean /= totFrames #distHistsMean /= totFrames #Now save everything to files #For 2D stuff (i.e. interfaces), use netCDF4 format outdata = Dataset("interface_Z.nc", "w", format="NETCDF4", zlib=True) #outdata.description("Interface heights (in the z-direction from surface atoms) and fluctuations for both mean and instantaneous interfaces.") #outdata.history = "Created " + time.ctime(time.time()) xdim = outdata.createDimension("XGridPoints", xSize) ydim = outdata.createDimension("YGridPoints", ySize) outXLocs = outdata.createVariable("XGridPoints", "f8", ("XGridPoints", )) outXLocs.units = "Angstroms" outYLocs = outdata.createVariable("YGridPoints", "f8", ("YGridPoints", )) outYLocs.units = "Angstroms" outIntZ = outdata.createVariable("InterfaceHeightInstant", "f8", ( "XGridPoints", "YGridPoints", )) outIntZ.units = "Angstroms relative to SU surface atoms" outIntZ[:, :] = interfaceZ outIntZMean = outdata.createVariable("InterfaceHeightMean", "f8", ( "XGridPoints", "YGridPoints", )) outIntZMean.units = "Angstroms relative to SU surface atoms" outIntZMean[:, :] = interfaceZmean outIntZSq = outdata.createVariable("InterfaceHeightInstantSq", "f8", ( "XGridPoints", "YGridPoints", )) outIntZSq.units = "Squared Angstroms relative to SU surface atoms" outIntZSq[:, :] = interfaceZSq outdata.close() #For everything else, just use text files, even though some may be a little big (i.e. angles and pair distances) np.savetxt( 'z-densities.txt', np.vstack( (zGridLocsFine, surfDensFine, watDensFineMean, watDensFine)).T, header= 'Distance normal to interface (A) Surface heavy atom density (1/A^3) Water oxygen density mean interface (at %f) Water oxygen density instantaneous interface' % interfaceZmean[0, 0]) np.savetxt( 'probe_hists_mean.txt', np.hstack((np.array([probeBins[:-1]]).T, probeHistsMean)), header= 'Number waters in probe histograms at following distances from mean interface: %s' % str(zSliceLocs)) np.savetxt( 'probe_hists_instant.txt', np.hstack((np.array([probeBins[:-1]]).T, probeHists)), header= 'Number waters in probe histograms at following distances from instantaneous interface: %s' % str(zSliceLocs)) np.savetxt( 'ang3b_hists_mean.txt', np.hstack((np.array([angBinCents]).T, angHistsMean)), header= '3-body angle histograms at following distances from mean interface: %s' % str(zSliceLocs)) np.savetxt( 'ang3b_hists_instant.txt', np.hstack((np.array([angBinCents]).T, angHists)), header= '3-body angle histograms at following distances from instantaneous interface: %s' % str(zSliceLocs)) np.savetxt( 'pair_hists_mean.txt', np.hstack((np.array([distBinCents]).T, distHistsMean)), header= 'O-O pair-distance histograms at following distances from mean interface: %s' % str(zSliceLocs)) np.savetxt( 'pair_hists_instant.txt', np.hstack((np.array([distBinCents]).T, distHists)), header= 'O-O pair-distance histograms at following distances from instantaneous interface: %s' % str(zSliceLocs)) print time.ctime(time.time())
def main(args): #Get the structure, topology, and trajectory files from the command line #ParmEd accepts a wide range of file types (Amber, GROMACS, CHARMM, OpenMM... but not LAMMPS) try: topFile = args[0] strucFile = args[1] trajFile = args[2] except IndexError: print( "Specify topology, structure, and trajectory files from the command line." ) print(Usage) sys.exit(2) #And also allow user to specify start frame, but default to zero if no input try: startFrame = int(args[3]) except IndexError: startFrame = 0 #And get information on whether or not to use a restraint try: boolstr = args[4] if boolstr.lower() == 'true' or boolstr.lower() == 'yes': restraintBool = True else: restraintBool = False except IndexError: restraintBool = False print("Using topology file: %s" % topFile) print("Using structure file: %s" % strucFile) print("Using trajectory file: %s" % trajFile) print("\nSetting up system...") #Load in the files for initial simulations top = pmd.load_file(topFile) struc = pmd.load_file(strucFile) #Transfer unit cell information to topology object top.box = struc.box[:] #Set up some global features to use in all simulations temperature = 298.15 * u.kelvin #Define the platform (i.e. hardware and drivers) to use for running the simulation #This can be CUDA, OpenCL, CPU, or Reference #CUDA is for NVIDIA GPUs #OpenCL is for CPUs or GPUs, but must be used for old CPUs (not SSE4.1 compatible) #CPU only allows single precision (CUDA and OpenCL allow single, mixed, or double) #Reference is a clear, stable reference for other code development and is very slow, using double precision by default platform = mm.Platform.getPlatformByName('CUDA') prop = { #'Threads': '1', #number of threads for CPU - all definitions must be strings (I think) 'Precision': 'mixed', #for CUDA or OpenCL, select the precision (single, mixed, or double) 'DeviceIndex': '0', #selects which GPUs to use - set this to zero if using CUDA_VISIBLE_DEVICES 'DeterministicForces': 'True' #Makes sure forces with CUDA and PME are deterministic } #Create the OpenMM system that can be used as a reference systemRef = top.createSystem( nonbondedMethod=app. PME, #Uses PME for long-range electrostatics, simple cut-off for LJ nonbondedCutoff=12.0 * u.angstroms, #Defines cut-off for non-bonded interactions rigidWater=True, #Use rigid water molecules constraints=app.HBonds, #Constrains all bonds involving hydrogens flexibleConstraints= False, #Whether to include energies for constrained DOFs removeCMMotion= True, #Whether or not to remove COM motion (don't want to if part of system frozen) ) #Set up the integrator to use as a reference integratorRef = mm.LangevinIntegrator( temperature, #Temperature for Langevin 1.0 / u.picoseconds, #Friction coefficient 2.0 * u.femtoseconds, #Integration timestep ) integratorRef.setConstraintTolerance(1.0E-08) #Get solute atoms and solute heavy atoms separately soluteIndices = [] heavyIndices = [] for res in top.residues: if res.name not in ['OTM', 'CTM', 'STM', 'NTM', 'SOL']: for atom in res.atoms: soluteIndices.append(atom.idx) if 'H' not in atom.name[0]: heavyIndices.append(atom.idx) #If working with expanded ensemble simulation of solute near the interface, need to include restraint to keep close if restraintBool: #Also get surface SU atoms and surface CU atoms at top and bottom of surface surfIndices = [] for atom in top.atoms: if atom.type == 'SU': surfIndices.append(atom.idx) print("\nSolute indices: %s" % str(soluteIndices)) print("Solute heavy atom indices: %s" % str(heavyIndices)) print("Surface SU atom indices: %s" % str(surfIndices)) #Will now add a custom bonded force between heavy atoms of each solute and surface SU atoms #Should be in units of kJ/mol*nm^2, but should check this #Also, note that here we are using a flat-bottom restraint to keep close to surface #AND to keep from penetrating into surface when it's in the decoupled state refZlo = 1.4 * u.nanometer #in nm, the distance between the SU atoms and the solute centroid refZhi = 1.7 * u.nanometer restraintExpression = '0.5*k*step(refZlo - (z2 - z1))*(((z2 - z1) - refZlo)^2)' restraintExpression += '+ 0.5*k*step((z2 - z1) - refZhi)*(((z2 - z1) - refZhi)^2)' restraintForce = mm.CustomCentroidBondForce(2, restraintExpression) restraintForce.addPerBondParameter('k') restraintForce.addPerBondParameter('refZlo') restraintForce.addPerBondParameter('refZhi') restraintForce.addGroup(surfIndices, np.ones( len(surfIndices))) #Don't weight with masses #To assign flat-bottom restraint correctly, need to know if each solute is above or below interface #Will need surface z-positions for this suZpos = np.average(struc.coordinates[surfIndices, 2]) restraintForce.addGroup(heavyIndices, np.ones(len(heavyIndices))) solZpos = np.average(struc.coordinates[heavyIndices, 2]) if (solZpos - suZpos) > 0: restraintForce.addBond([0, 1], [10000.0, refZlo, refZhi]) else: #A little confusing, but have to negate and switch for when z2-z1 is always going to be negative restraintForce.addBond([0, 1], [10000.0, -refZhi, -refZlo]) systemRef.addForce(restraintForce) #And define lambda states of interest lambdaVec = np.array( #electrostatic lambda - 1.0 is fully interacting, 0.0 is non-interacting [ [ 1.00, 0.75, 0.50, 0.25, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00 ], #LJ lambdas - 1.0 is fully interacting, 0.0 is non-interacting [ 1.00, 1.00, 1.00, 1.00, 1.00, 0.90, 0.80, 0.70, 0.60, 0.50, 0.40, 0.35, 0.30, 0.25, 0.20, 0.15, 0.10, 0.05, 0.00 ] ]) #We need to add a custom non-bonded force for the solute being alchemically changed #Will be helpful to have handle on non-bonded force handling LJ and coulombic interactions NBForce = None for frc in systemRef.getForces(): if (isinstance(frc, mm.NonbondedForce)): NBForce = frc #Turn off dispersion correction since have interface NBForce.setUseDispersionCorrection(False) #Separate out alchemical and regular particles using set objects alchemicalParticles = set(soluteIndices) chemicalParticles = set(range( systemRef.getNumParticles())) - alchemicalParticles #Define the soft-core function for turning on/off LJ interactions #In energy expressions for CustomNonbondedForce, r is a special variable and refers to the distance between particles #All other variables must be defined somewhere in the function. #The exception are variables like sigma1 and sigma2. #It is understood that a parameter will be added called 'sigma' and that the '1' and '2' are to specify the combining rule. #Have also added parameter to switch the soft-core interaction to a WCA potential softCoreFunctionWCA = '(step(x-0.5))*(4.0*lambdaLJ*epsilon*x*(x-1.0) + (1.0-lambdaWCA)*lambdaLJ*epsilon) ' softCoreFunctionWCA += '+ (1.0 - step(x-0.5))*lambdaWCA*(4.0*lambdaLJ*epsilon*x*(x-1.0));' softCoreFunctionWCA += 'x = (1.0/reff_sterics);' softCoreFunctionWCA += 'reff_sterics = (0.5*(1.0-lambdaLJ) + ((r/sigma)^6));' softCoreFunctionWCA += 'sigma=0.5*(sigma1+sigma2); epsilon = sqrt(epsilon1*epsilon2)' #Define the system force for this function and its parameters SoftCoreForceWCA = mm.CustomNonbondedForce(softCoreFunctionWCA) SoftCoreForceWCA.addGlobalParameter( 'lambdaLJ', 1.0 ) #Throughout, should follow convention that lambdaLJ=1.0 is fully-interacting state SoftCoreForceWCA.addGlobalParameter( 'lambdaWCA', 1.0) #When 1, attractions included; setting to 0 turns off attractions SoftCoreForceWCA.addPerParticleParameter('sigma') SoftCoreForceWCA.addPerParticleParameter('epsilon') #Will turn off electrostatics completely in the original non-bonded force #In the end-state, only want electrostatics inside the alchemical molecule #To do this, just turn ON a custom force as we turn OFF electrostatics in the original force ONE_4PI_EPS0 = 138.935456 #in kJ/mol nm/e^2 soluteCoulFunction = '(1.0-(lambdaQ^2))*ONE_4PI_EPS0*charge/r;' soluteCoulFunction += 'ONE_4PI_EPS0 = %.16e;' % (ONE_4PI_EPS0) soluteCoulFunction += 'charge = charge1*charge2' SoluteCoulForce = mm.CustomNonbondedForce(soluteCoulFunction) #Note this lambdaQ will be different than for soft core (it's also named differently, which is CRITICAL) #This lambdaQ corresponds to the lambda that scales the charges to zero #To turn on this custom force at the same rate, need to multiply by (1.0-lambdaQ**2), which we do SoluteCoulForce.addGlobalParameter('lambdaQ', 1.0) SoluteCoulForce.addPerParticleParameter('charge') #Also create custom force for intramolecular alchemical LJ interactions #Could include with electrostatics, but nice to break up #We could also do this with a separate NonbondedForce object, but it would be a little more work, actually soluteLJFunction = '4.0*epsilon*x*(x-1.0); x = (sigma/r)^6;' soluteLJFunction += 'sigma=0.5*(sigma1+sigma2); epsilon=sqrt(epsilon1*epsilon2)' SoluteLJForce = mm.CustomNonbondedForce(soluteLJFunction) SoluteLJForce.addPerParticleParameter('sigma') SoluteLJForce.addPerParticleParameter('epsilon') #Loop over all particles and add to custom forces #As we go, will also collect full charges on the solute particles #AND we will set up the solute-solute interaction forces alchemicalCharges = [[0]] * len(soluteIndices) for ind in range(systemRef.getNumParticles()): #Get current parameters in non-bonded force [charge, sigma, epsilon] = NBForce.getParticleParameters(ind) #Make sure that sigma is not set to zero! Fine for some ways of writing LJ energy, but NOT OK for soft-core! if sigma / u.nanometer == 0.0: newsigma = 0.3 * u.nanometer #This 0.3 is what's used by GROMACS as a default value for sc-sigma else: newsigma = sigma #Add the particle to the soft-core force (do for ALL particles) SoftCoreForceWCA.addParticle([newsigma, epsilon]) #Also add the particle to the solute only forces SoluteCoulForce.addParticle([charge]) SoluteLJForce.addParticle([sigma, epsilon]) #If the particle is in the alchemical molecule, need to set it's LJ interactions to zero in original force if ind in soluteIndices: NBForce.setParticleParameters(ind, charge, sigma, epsilon * 0.0) #And keep track of full charge so we can scale it right by lambda alchemicalCharges[soluteIndices.index(ind)] = charge #Now we need to handle exceptions carefully for ind in range(NBForce.getNumExceptions()): [p1, p2, excCharge, excSig, excEps] = NBForce.getExceptionParameters(ind) #For consistency, must add exclusions where we have exceptions for custom forces SoftCoreForceWCA.addExclusion(p1, p2) SoluteCoulForce.addExclusion(p1, p2) SoluteLJForce.addExclusion(p1, p2) #Only compute interactions between the alchemical and other particles for the soft-core force SoftCoreForceWCA.addInteractionGroup(alchemicalParticles, chemicalParticles) #And only compute alchemical/alchemical interactions for other custom forces SoluteCoulForce.addInteractionGroup(alchemicalParticles, alchemicalParticles) SoluteLJForce.addInteractionGroup(alchemicalParticles, alchemicalParticles) #Set other soft-core parameters as needed SoftCoreForceWCA.setCutoffDistance(12.0 * u.angstroms) SoftCoreForceWCA.setNonbondedMethod(mm.CustomNonbondedForce.CutoffPeriodic) SoftCoreForceWCA.setUseLongRangeCorrection(False) systemRef.addForce(SoftCoreForceWCA) #Set other parameters as needed - note that for the solute force would like to set no cutoff #However, OpenMM won't allow a bunch of potentials with cutoffs then one without... #So as long as the solute is smaller than the cut-off, won't have any problems! SoluteCoulForce.setCutoffDistance(12.0 * u.angstroms) SoluteCoulForce.setNonbondedMethod(mm.CustomNonbondedForce.CutoffPeriodic) SoluteCoulForce.setUseLongRangeCorrection(False) systemRef.addForce(SoluteCoulForce) SoluteLJForce.setCutoffDistance(12.0 * u.angstroms) SoluteLJForce.setNonbondedMethod(mm.CustomNonbondedForce.CutoffPeriodic) SoluteLJForce.setUseLongRangeCorrection(False) systemRef.addForce(SoluteLJForce) #Need to add integrator and context in order to evaluate potential energies #Integrator is arbitrary because won't use it integrator = mm.VerletIntegrator(1.0 * u.femtoseconds) context = mm.Context(systemRef, integrator, platform, prop) ########################################################################## print("\nStarting analysis...") kBT = u.AVOGADRO_CONSTANT_NA * u.BOLTZMANN_CONSTANT_kB * temperature #Set up arrays to hold potential energies #First row will be coupled, then no electrostatics, then no electrostatics with WCA, then decoupled allU = np.array([[]] * 4).T #We've now set everything up like we're going to run a simulation #But now we will use pytraj to load a trajectory to get coordinates #With those coordinates, we will evaluate the energies we want #Just need to figure out if we have a surface or a bulk system trajFiles = glob.glob('Quad*/%s' % trajFile) if len(trajFiles) == 0: trajFiles = [trajFile] print("Using following trajectory files: %s" % str(trajFiles)) for aFile in trajFiles: trajtop = copy.deepcopy(top) trajtop.rb_torsions = pmd.TrackedList( []) #Necessary for SAM systems so doesn't break pytraj trajtop = pt.load_parmed(trajtop, traj=False) traj = pt.iterload(aFile, top=trajtop, frame_slice=(startFrame, -1)) thisAllU = np.zeros((len(traj), 4)) #Loop over the lambda states of interest, looping over whole trajectory each time for i, lstate in enumerate([ [1.0, 1.0, 1.0], #Fully coupled [1.0, 1.0, 0.0], #Charge turned off [1.0, 0.0, 0.0], #Charged turned off, attractions turned off (so WCA) [0.0, 1.0, 0.0] ]): #Decoupled (WCA still included, though doesn't matter) #Set the lambda state context.setParameter('lambdaLJ', lstate[0]) context.setParameter('lambdaWCA', lstate[1]) context.setParameter('lambdaQ', lstate[2]) for k, ind in enumerate(soluteIndices): [charge, sig, eps] = NBForce.getParticleParameters(ind) NBForce.setParticleParameters(ind, alchemicalCharges[k] * lstate[2], sig, eps) NBForce.updateParametersInContext(context) #And loop over trajectory for t, frame in enumerate(traj): thisbox = np.array(frame.box.values[:3]) context.setPeriodicBoxVectors( np.array([thisbox[0], 0.0, 0.0]) * u.angstrom, np.array([0.0, thisbox[1], 0.0]) * u.angstrom, np.array([0.0, 0.0, thisbox[2]]) * u.angstrom) thispos = np.array(frame.xyz) * u.angstrom context.setPositions(thispos) thisAllU[t, i] = context.getState( getEnergy=True).getPotentialEnergy() / kBT #Add this trajectory information allU = np.vstack((allU, thisAllU)) #And that should be it, just need to save files and print information #avgUq = np.average(allU[0,:] - allU[1,:]) #stdUq = np.std(allU[0,:] - allU[1,:], ddof=1) #avgUlj = np.average(allU[1,:] - allU[2,:]) #stdUlj = np.std(allU[1,:] - allU[2,:], ddof=1) #print("\nAverage solute-water electrostatic potential energy: %f +/- %f"%(avgUq, stdUq)) #print("Average solute-water LJ potential energy: %f +/- %f"%(avgUlj, stdUlj)) np.savetxt( 'pot_energy_decomp.txt', allU, header= 'U_coupled (kBT) U_noQ (kBT) U_noQ_WCA (kBT) U_decoupled (kBT) ' ) #Print some meaningful information #Just make sure we do so as accurately as possible using alchemical information alchFile = glob.glob('alchemical_U*.txt')[0] print('Using alchemical information file: %s' % alchFile) printInfo(allU, mbarFile='mbar_object.pkl', alchFile=alchFile)
def merge(self): def dummify(atom): atom.atom_type.epsilon = 0 atom.atom_type.charge = 0 atom.atom_type.rmin = 0 atom.atom_type.name = "du" atom.atom_type.atomic_number = 0 atom.charge = 0 atom.name = "du" atom.type = "du" return atom self.map = [] N = 0 for r, (res1, res2) in enumerate( zip(self.system1.residues, self.system2.residues)): incr = 0 N += len(res1.atoms) map = {} for i, atom1 in enumerate(res1.atoms): for j, atom2 in enumerate(res2.atoms): if atom1.xx == atom2.xx and atom1.xy == atom2.xy and atom1.xz == atom2.xz: map[i] = j break if j == len(self.system2.residues[r]) - 1: du = _copy.deepcopy(atom1) dummify(du) self.system2.add_atom(du, res2.name, res2.number) map[i] = N + incr incr += 1 break rev_map = lambda x: list(map.keys())[list(map.values()).index(x)] for j, atom2 in enumerate(res2.atoms): try: rev_map(j) except ValueError: # calling the property enables index update - this seemingly pointless line is needed du = _copy.deepcopy(atom2) dummify(du) self.system1.add_atom(du, res1.name, res1.number) self.system1.atoms[-1].idx map[len(res1.atoms) - 1] = j for j, atom2 in enumerate(res2.atoms): # calling the property enables index update - these two seemingly pointless lines are needed atom2.idx atom2._idx = rev_map(j) atom2.idx res2.sort() self.system1.atoms.sort() self.system2.atoms.sort() # bonds - needed since parmed doesn't write nonbonded atoms bonds_1 = { tuple(sorted([x.atom1.idx, x.atom2.idx])): x.type for x in self.system1.bonds } bonds_2 = { tuple(sorted([x.atom1.idx, x.atom2.idx])): x.type for x in self.system2.bonds } common_keys = set(bonds_1.keys()).intersection(bonds_2.keys()) unique_bonds_1 = set(bonds_1.keys()) - common_keys unique_bonds_2 = set(bonds_2.keys()) - common_keys for i, j in ((1, 2), (2, 1)): exec("bonds_i, bonds_j = bonds_%d, bonds_%d" % (i, j), locals(), globals()) exec("unique_bonds_i = unique_bonds_%d" % i, locals(), globals()) exec("system_j = self.system%d" % j, locals(), globals()) for key in unique_bonds_i: val = bonds_i[key] if val not in system_j.bond_types: bond_type = _pmd.BondType(val.k, val.req, system_j.bond_types) system_j.bond_types.append(bond_type) bonds_j[key] = system_j.bond_types[-1] else: bonds_j[key] = system_j.bond_types[ system_j.bond_types.index(val)] system_j.bonds = bonds_j system_j.bonds = _pmd.TrackedList([ _pmd.Bond(system_j.atoms[m], system_j.atoms[n], type=t) for (m, n), t in sorted(bonds_j.items()) ]) '''
def main(args): #Get the structure and topology files from the command line #ParmEd accepts a wide range of file types (Amber, GROMACS, CHARMM, OpenMM... but not LAMMPS) try: topFile = args[0] strucFile = args[1] except IndexError: print("Specify topology and structure files from the command line.") sys.exit(2) print("Using topology file: %s" % topFile) print("Using structure file: %s" % strucFile) print("\nSetting up system...") #Load in the files for initial simulations top = pmd.load_file(topFile) struc = pmd.load_file(strucFile) #Transfer unit cell information to topology object top.box = struc.box[:] #Set up some global features to use in all simulations temperature = 298.15 * u.kelvin #Define the platform (i.e. hardware and drivers) to use for running the simulation #This can be CUDA, OpenCL, CPU, or Reference #CUDA is for NVIDIA GPUs #OpenCL is for CPUs or GPUs, but must be used for old CPUs (not SSE4.1 compatible) #CPU only allows single precision (CUDA and OpenCL allow single, mixed, or double) #Reference is a clear, stable reference for other code development and is very slow, using double precision by default platform = mm.Platform.getPlatformByName('CUDA') prop = { #'Threads': '2', #number of threads for CPU - all definitions must be strings (I think) 'Precision': 'mixed', #for CUDA or OpenCL, select the precision (single, mixed, or double) 'DeviceIndex': '0', #selects which GPUs to use - set this to zero if using CUDA_VISIBLE_DEVICES 'DeterministicForces': 'True' #Makes sure forces with CUDA and PME are deterministic } #Create the OpenMM system that can be used as a reference systemRef = top.createSystem( nonbondedMethod=app. PME, #Uses PME for long-range electrostatics, simple cut-off for LJ nonbondedCutoff=12.0 * u.angstroms, #Defines cut-off for non-bonded interactions rigidWater=True, #Use rigid water molecules constraints=app.HBonds, #Constrains all bonds involving hydrogens flexibleConstraints= False, #Whether to include energies for constrained DOFs removeCMMotion= False, #Whether or not to remove COM motion (don't want to if part of system frozen) ) #Set up the integrator to use as a reference integratorRef = mm.LangevinIntegrator( temperature, #Temperature for Langevin 1.0 / u.picoseconds, #Friction coefficient 2.0 * u.femtoseconds, #Integration timestep ) integratorRef.setConstraintTolerance(1.0E-08) #To freeze atoms, set mass to zero (does not apply to virtual sites, termed "extra particles" in OpenMM) #Here assume (correctly, I think) that the topology indices for atoms correspond to those in the system for i, atom in enumerate(top.atoms): if atom.type in ('SU'): #, 'CU', 'CUO'): systemRef.setParticleMass(i, 0 * u.dalton) #Track non-bonded force, mainly to turn off dispersion correction NBForce = None for frc in systemRef.getForces(): if (isinstance(frc, mm.NonbondedForce)): NBForce = frc #Turn off dispersion correction since have interface NBForce.setUseDispersionCorrection(False) #Get solute atoms and solute heavy atoms separately soluteIndices = [] heavyIndices = [] for res in top.residues: if res.name not in ['OTM', 'CTM', 'STM', 'NTM', 'SOL']: for atom in res.atoms: soluteIndices.append(atom.idx) if 'H' not in atom.name[0]: heavyIndices.append(atom.idx) #JUST for boric acid, add a custom bonded force #Couldn't find a nice, compatible force field, but did find A forcefield, so using it #But has no angle terms on O-B-O and instead a weird bond repulsion term #This term also prevents out of plane bending #Simple in our case because boric acid is symmetric, so only need one parameter #Parameters come from Otkidach and Pletnev, 2001 #Here, Ad = (A^2) / (d^6) since Ai and Aj and di and dj are all the same #In the original paper, B-OH bond had A = 1.72 and d = 0.354 #Note that d is dimensionless and A should have units of (Angstrom^3)*(kcal/mol)^(1/2) #These units are inferred just to make things work out with kcal/mol and the given distance dependence bondRepulsionFunction = 'Ad*(1.0/r)^6' BondRepulsionForce = mm.CustomBondForce(bondRepulsionFunction) BondRepulsionForce.addPerBondParameter( 'Ad') #Units are technically kJ/mol * nm^6 baOxInds = [] for aind in soluteIndices: if top.atoms[aind].type == 'oh': baOxInds.append(aind) for i in range(len(baOxInds)): for j in range(i + 1, len(baOxInds)): BondRepulsionForce.addBond(baOxInds[i], baOxInds[j], [0.006289686]) systemRef.addForce(BondRepulsionForce) #Also get surface SU atoms surfIndices = [] for atom in top.atoms: if atom.type == 'SU': surfIndices.append(atom.idx) startPos = np.array(struc.positions.value_in_unit(u.nanometer)) print("\nSolute indices: %s" % str(soluteIndices)) print("Solute heavy atom indices: %s" % str(heavyIndices)) print("Surface SU atom indices: %s" % str(surfIndices)) #Solute should already be placed far from the surface #If this is not done right, or it is too close to half the periodic box distance, will have issues #Either way, set this as the starting reference z distance initRefZ = np.average(startPos[heavyIndices, 2]) - np.average( startPos[surfIndices, 2]) print(initRefZ) #Will now add a custom bonded force between solute heavy atoms and surface SU atoms #Should be in units of kJ/mol*nm^2, but should check this #For expanded ensemble, fine to assume solute is less than half the box distance from the surface #But for umbrella sampling, want to apply pulling towards surface regardless of whether pull from above or below #Also allows us to get further from the surface with our umbrellas without worrying about PBCs restraintExpression = '0.5*k*(abs(z2 - z1) - refZ)^2' restraintForce = mm.CustomCentroidBondForce(2, restraintExpression) restraintForce.addPerBondParameter('k') restraintForce.addGlobalParameter( 'refZ', initRefZ) #Make global so can modify during simulation restraintForce.addGroup(surfIndices, np.ones( len(surfIndices))) #Don't weight with masses restraintForce.addGroup(heavyIndices, np.ones(len(heavyIndices))) restraintForce.addBond([0, 1], [1000.0]) restraintForce.setUsesPeriodicBoundaryConditions( True) #Only when doing umbrella sampling systemRef.addForce(restraintForce) forceLabelsRef = getForceLabels(systemRef) decompEnergy(systemRef, struc.positions, labels=forceLabelsRef, verbose=False) #Do NVT simulation stateFileNVT1, stateNVT1 = doSimNVT(top, systemRef, integratorRef, platform, prop, temperature, pos=struc.positions) #Do pulling simulation - really I'm just slowly changing the equilibrium bond distance between the surface and the solute stateFilePull, statePull = doSimPull(top, systemRef, integratorRef, platform, prop, temperature, state=stateFileNVT1) decompEnergy(systemRef, statePull, labels=forceLabelsRef, verbose=False) #Load in pulling restraint data and pulling trajectory to identify starting structures for each umbrella pullData = np.loadtxt('pull_restraint.txt') trajtop = copy.deepcopy(top) trajtop.rb_torsions = pmd.TrackedList([]) trajtop = pt.load_parmed(trajtop, traj=False) pulltraj = pt.iterload('pull.nc', trajtop) frameTimes = np.array([frame.time for frame in pulltraj]) #Define some umbrella distances based on a fixed spacing between initial and final pulling coordinate #Actually for final pulling coordinate, close to the surface, using average of actual coordinate and reference zSpace = 0.1 #nm zUmbs = np.arange(0.5 * (pullData[-1, 1] + pullData[-1, 2]), pullData[0, 2], zSpace) print("\nUsing following umbrellas:") print(zUmbs) #Then loop over umbrellas and run simulation for each for i, zRefDist in enumerate(zUmbs): os.mkdir("umbrella%i" % i) os.chdir("umbrella%i" % i) #Find where in the pulling trajectory the solute came closest to this umbrella pullDatInd = np.argmin(abs(pullData[:, 2] - zRefDist)) frameInd = np.argmin(abs(frameTimes - pullData[pullDatInd, 0])) #Get starting coordinates #Making sure to assign the units that will be returned by pytraj thisCoords = np.array(pulltraj[frameInd].xyz) * u.angstrom #Set reference value for harmonic force restraintForce.setGlobalParameterDefaultValue(0, zRefDist) print("\nUmbrella %i:" % i) print("\tReference distance: %f" % zRefDist) print("\tFrame chosen from trajectory: %i (%f ps)" % (frameInd, frameTimes[frameInd])) #And run simulations, first equilibrating in NVT, then NPT, then production in NPT stateFileNVT, stateNVT = doSimNVT(top, systemRef, integratorRef, platform, prop, temperature, pos=thisCoords) stateFileNPT, stateNPT = doSimNPT(top, systemRef, integratorRef, platform, prop, temperature, state=stateFileNVT) stateFileProd, stateProd = doSimUmbrella(top, systemRef, integratorRef, platform, prop, temperature, state=stateFileNPT) os.chdir("../")
def main(args): print('\nReading in files and obtaining LJ information...') #First read in topology and trajectory topFile = args[0] trajFile = args[1] top = pmd.load_file(topFile) trajtop = copy.deepcopy(top) trajtop.rb_torsions = pmd.TrackedList( []) #Necessary for SAM systems so doesn't break pytraj trajtop = pt.load_parmed(trajtop, traj=False) traj = pt.iterload(trajFile, top=trajtop) #Next use parmed to get LJ parameters for all atoms in the solute, as well as water oxygens and surface atoms #While go, also collect dictionaries of atomic indices associated with each atom type #Will have to check separately when looking at overlaps with solute soluteLJ = [] soluteInds = [] dictOtherLJ = {} dictOtherInds = {} for res in top.residues: if res.name not in ['OTM', 'CTM', 'STM', 'NTM', 'SOL']: for atom in res.atoms: soluteInds.append(atom.idx) soluteLJ.append([atom.sigma, atom.epsilon]) else: for atom in res.atoms: if not atom.type in dictOtherInds: dictOtherInds[atom.type] = [atom.idx] else: dictOtherInds[atom.type].append(atom.idx) if not atom.type in dictOtherLJ: dictOtherLJ[atom.type] = np.array( [atom.sigma, atom.epsilon]) soluteLJ = np.array(soluteLJ) print(soluteLJ) #Use Lorentz-Berthelot combining rules to get LJ parameters between each solute atom and a water oxygen dictMixLJ = {} for i in range(soluteLJ.shape[0]): for akey in dictOtherLJ: dictMixLJ['%i_%s' % (i, akey)] = np.array([ 0.5 * (soluteLJ[i, 0] + dictOtherLJ[akey][0]), np.sqrt(soluteLJ[i, 1] * dictOtherLJ[akey][1]) ]) for key, val in dictMixLJ.iteritems(): print("%s, %s" % (key, str(val.tolist()))) print( '\nDetermining hard-sphere radii for all combinations of solute and other system atoms...' ) #Next compute hard-sphere radii by integrating according to Barker and Hendersen, Weeks, etc. #In order to this right, technically using WCA potential, not LJ hsRadii = {} rvals = np.arange(0.0, 50.005, 0.005) betaLJ = 1.0 / ((1.9872036E-03) * 298.15) for i in range(soluteLJ.shape[0]): for akey in dictOtherLJ: [thisSig, thisEps] = dictMixLJ['%i_%s' % (i, akey)] if thisEps == 0.0: hsRadii['%i_%s' % (i, akey)] = 0.0 continue thisRmin = thisSig * (2.0**(1.0 / 6.0)) thisSigdRmin6 = (thisSig / thisRmin)**6 thisPotRmin = 4.0 * thisEps * ((thisSigdRmin6**2) - thisSigdRmin6) thisSigdR6 = (thisSig / rvals)**6 thisPotVals = 4.0 * thisEps * ( (thisSigdR6**2) - thisSigdR6) - thisPotRmin thisPotVals[np.where(rvals >= thisRmin)] = 0.0 thisPotVals *= betaLJ thisIntegrand = 1.0 - np.exp(-thisPotVals) thisIntegrand[0] = 1.0 hsRadii['%i_%s' % (i, akey)] = simps(thisIntegrand, rvals) #Need to multiply by two because will use atom centers to check overlap? Is Rhs distance between centers? for key, val in hsRadii.iteritems(): print("%s, %f" % (key, val)) #Keep track of hard-sphere radii with water oxygens specially solOWhsRadii = np.zeros(soluteLJ.shape[0]) for i in range(soluteLJ.shape[0]): solOWhsRadii[i] = hsRadii['%i_OW_tip4pew' % i] #Only using TIP4P/EW here print('\nStarting loop over trajectory...') #Now loop over trajectory and check if solute is overlapping with any waters OR surface atoms #Will create a distribution of overlapping atoms numOverlap = np.arange(101.0) countOverlap = np.zeros(len(numOverlap)) #And also track average solute volume that we're trying to insert solVol = 0.0 countFrames = 0 print('') for frame in traj: if countFrames % 100 == 0: print("On frame %i" % countFrames) countFrames += 1 boxDims = np.array(frame.box.values[:3]) currCoords = np.array(frame.xyz) #Get solute coordinates and make sure solute is whole #Then get solute coordinates relative to first solute atom #Don't need any other wrapping because wl.nearneighbors will do its own wrapping solCoords = currCoords[soluteInds] solRefCoords = wl.reimage(solCoords, solCoords[0], boxDims) - solCoords[0] #While we have the coordinates nice, compute solute volume #Do this based on hard-sphere radii to water oxygens, which is the most likely case anyway solVol += np.sum(wl.spherevolumes(solRefCoords, solOWhsRadii, 0.1)) #Will shift the solute first atom (and others too) to a number of random locations #Since applying restraint (if have surface) Z is drawn from the distribution we want #X and Y can be drawn more easily from uniform distributions, so do this to randomize solute position numRandXY = 1000 randX = np.random.random(numRandXY) * boxDims[0] randY = np.random.random(numRandXY) * boxDims[1] #And will keep track of number of overlapping atoms for each solute random position thisTotOverlap = np.zeros(numRandXY, dtype=int) #Loop over all non-solute atoms in the system by atom type for akey, val in dictOtherInds.iteritems(): #For this specific atom type, need to keep track of WHICH neighbors overlapping #And need to do for EACH solute atom #Don't want to double-count if two solute atoms both overlap the same other atom overlapBool = np.zeros((numRandXY, len(val)), dtype=int) #Now loop over each solute atom for i, coord in enumerate(solRefCoords): thisRadius = hsRadii['%i_%s' % (i, akey)] if thisRadius == 0.0: continue #Define coordinates of the current solute atom we're working with #So setting first atom to random XY position, then shifting by distance to first atom hsCoords = np.zeros((numRandXY, 3)) hsCoords[:, 0] = coord[0] + randX hsCoords[:, 1] = coord[1] + randY hsCoords[:, 2] = coord[2] + solCoords[0, 2] #Identify boolean for overlapping atoms and add to overall boolean for overlap #Note that want OR operation, so adding boolean arrays overlapBool += wl.nearneighbors(hsCoords, currCoords[val], boxDims, 0.0, thisRadius) #For this non-solute atom type, add number of atoms overlapping with ANY solute atom thisTotOverlap += np.sum(np.array(overlapBool, dtype=bool), axis=1) thisBins = np.arange(np.max(thisTotOverlap) + 1) countOverlap[thisBins] += np.bincount(thisTotOverlap) print(countOverlap.tolist()) print('Hard-sphere solute insertion probability: %f' % (-np.log(countOverlap[0] / np.sum(countOverlap)))) #Save the distribution to file np.savetxt( 'HS-solute_overlap_hist.txt', np.vstack((numOverlap, countOverlap)).T, header= 'Number of non-solute atoms overlapping Histogram count') solVol /= float(len(traj)) print( 'Average solute hard-sphere volume (based on water oxygen LJ params): %f' % (solVol))