Exemple #1
0
def test_load_parmed():
    top_fname = fn("tz2.ortho.parm7")
    traj_fname = fn("tz2.ortho.nc")
    parm = parmed.load_file(top_fname, xyz=traj_fname)
    traj = pt.load_parmed(parm, traj=True)
    aa_eq(parm.get_coordinates(), traj.xyz)
    aa_eq(parm.box, traj.top.box.tolist())
    aa_eq(parm.get_box(), traj.unitcells)
Exemple #2
0
def static_DAT_restraint(
    restraint_mask_list,
    num_window_list,
    ref_structure,
    force_constant,
    continuous_apr=True,
    amber_index=False,
):
    """
    Create a restraint whose value does not change during a calculation.

    Parameters
    ----------
    restraint_mask_list: list
        A list of masks for which this restraint applies.
    num_window_list: list
        A list of windows during which this restraint will be applied, which should be in the form: [attach windows,
        pull windows, release windows].
    ref_structure: Path-like or parmed Amber object
        The reference structure that is used to determine the initial, **static** value for this restraint.
    force_constant: float
        The force constant for this restraint.
    continuous_apr: bool
        Whether this restraint uses ``continuous_apr``. This must be consistent with existing restraints.
    amber_index: bool
        Whether the atom indices for the restraint should be AMBER-style (+1) or not.

    Returns
    -------
    rest: ``DAT_restraint``
        A static restraint.

    """

    # Check num_window_list
    if len(num_window_list) != 3:
        raise ValueError(
            "The num_window_list needs to contain three integers corresponding to the number of windows in the "
            "attach, pull, and release phase, respectively ")

    rest = DAT_restraint()
    rest.continuous_apr = continuous_apr
    rest.amber_index = amber_index

    if isinstance(ref_structure, pmd.amber._amberparm.AmberParm):
        reference_trajectory = pt.load_parmed(ref_structure, traj=True)
        rest.topology = ref_structure
    elif isinstance(ref_structure, str):
        reference_trajectory = pt.iterload(ref_structure, traj=True)
        rest.topology = pmd.load_file(ref_structure, structure=True)
    else:
        raise TypeError(
            "static_DAT_restraint does not support the type associated with ref_structure:"
            + type(ref_structure))

    rest.mask1 = restraint_mask_list[0]
    rest.mask2 = restraint_mask_list[1]
    if len(restraint_mask_list) >= 3:
        rest.mask3 = restraint_mask_list[2]
    if len(restraint_mask_list) == 4:
        rest.mask4 = restraint_mask_list[3]

    # Target value
    mask_string = " ".join(restraint_mask_list)
    if len(restraint_mask_list) == 2:
        # Distance restraint
        if reference_trajectory.top.has_box():
            target = pt.distance(reference_trajectory, mask_string,
                                 image=True)[0]
            logger.debug("Calculating distance with 'image = True' ...")
        else:
            target = pt.distance(reference_trajectory,
                                 mask_string,
                                 image=False)[0]
            logger.debug("Calculating distance with 'image = False' ...")
    elif len(restraint_mask_list) == 3:
        # Angle restraint
        target = pt.angle(reference_trajectory, mask_string)[0]
    elif len(restraint_mask_list) == 4:
        # Dihedral restraint
        target = pt.dihedral(reference_trajectory, mask_string)[0]
    else:
        raise IndexError(
            f"The number of masks -- {len(restraint_mask_list)} -- is not 2, 3, or 4 and thus is not one of the "
            f"supported types: distance, angle, or dihedral.")

    # Attach phase
    if num_window_list[0] is not None and num_window_list[0] != 0:
        rest.attach["target"] = target
        rest.attach["fc_initial"] = force_constant
        rest.attach["fc_final"] = force_constant
        rest.attach["num_windows"] = num_window_list[0]

    # Pull phase
    if num_window_list[1] is not None and num_window_list[1] != 0:
        rest.pull["fc"] = force_constant
        rest.pull["target_initial"] = target
        rest.pull["target_final"] = target
        rest.pull["num_windows"] = num_window_list[1]

    # Release phase
    if num_window_list[2] is not None and num_window_list[2] != 0:
        rest.release["target"] = target
        rest.release["fc_initial"] = force_constant
        rest.release["fc_final"] = force_constant
        rest.release["num_windows"] = num_window_list[2]

    rest.initialize()

    return rest
Exemple #3
0
    #Also, first frame in trajectory is NOT at time zero, so subtract 1
    if endTime == -1:
        thisPot = alcDat[:, 3:-1]
    else:
        thisPot = alcDat[:endTime, 3:-1]
    thisg = timeseries.statisticalInefficiencyMultiple(thisPot)
    print("Statistical inefficiency for this set of potential energies: %f" %
          thisg)

    #print(startTime)
    #print(startFrame)
    #print(thisPot.shape)

    #Next load in the trajectory and get all solute coordinates that matter
    top.rb_torsions = pmd.TrackedList([])
    top = pt.load_parmed(top, traj=False)
    if endTime == -1:
        traj = pt.iterload(trajFile, top, frame_slice=(startFrame, -1))
    else:
        traj = pt.iterload(trajFile,
                           top,
                           frame_slice=(startFrame, startFrame + endTime))
    nFrames = len(traj)
    xyBox = np.array(
        traj[0].box.values
    )[:
      2]  #A little lazy, but all boxes should be same and fixed in X and Y dimensions

    #print(nFrames)

    #To correctly map solute positions on both sides of surface, need fixed reference on each surface face
Exemple #4
0
def getConfigWeightsSurf(kB=0.008314459848, T=298.15):
  """Computes and returns the configuration weights for simulations with a solute
   at an interface.
   Mostly replicates calcdGsolv in genetic_lib, but returns config weights in both
   the fully coupled and decoupled states (also includes pV term - won't matter very
   much for free energy differences, but maybe matters for weighting configurations, 
   even though also probably not too much).
  """
  #First define directory structure, spring constants, etc.
  simDirs = ['Quad_0.25X_0.25Y', 'Quad_0.25X_0.75Y', 'Quad_0.75X_0.25Y', 'Quad_0.75X_0.75Y']
  kXY = [10.0, 10.0, 10.0, 10.0] #spring constant in kJ/mol*A^2
  refX = [7.4550, 7.4550, 22.3650, 22.3650]
  refY = [8.6083, 25.8249, 8.6083, 25.8249]
  distRefX = [7.4550, 7.4550, 7.4550, 7.4550]
  distRefY = [8.6083, 8.6083, 8.6083, 8.6083]
  numStates = 19

  #And some constants
  kBT = kB*T
  beta = 1.0 / kBT

  #First make sure all the input arrays have the same dimensions
  numSims = len(simDirs)
  allLens = np.array([len(a) for a in [kXY, refX, refY, distRefX, distRefY]])

  #Want to loop over all trajectories provided, storing solute position information to calculate restraints
  xyPos = None #X and Y coordinates of first heavy atom for all solutes - get shape later
  nSamps = np.zeros((len(simDirs), numStates), dtype=int) #Have as many x-y restraints as sims and same number of lambda states for each 
  allPots = np.array([[]]*numStates).T #Potential energies, EXCLUDING RESTRAINT, for each simulation frame and lambda state
                                       #Will also include pV term because may matter for configurations
  xyBox = np.zeros(2)

  for i, adir in enumerate(simDirs):

    topFile = "%s/../sol_surf.top"%adir
    trajFile = "%s/prod.nc"%adir
    alchemicalFile = "%s/alchemical_output.txt"%adir

    #First load in topology and get atom indices 
    top = pmd.load_file(topFile)

    #Get solute heavy atoms for each solute
    #Also get indices of surface atoms to use as references later
    #Only taking last united atoms of first SAM molecule we find
    heavyIndices = []
    for res in top.residues:
      if res.name not in ['OTM', 'CTM', 'STM', 'NTM', 'SOL']: #Assumes working with SAM surface...
        thisheavyinds = []
        for atom in res.atoms:
          if not atom.name[0] == 'H':
            thisheavyinds.append(atom.idx)
        heavyIndices.append(thisheavyinds)

    #Make into arrays for easier referencing
    heavyIndices = np.array(heavyIndices)

    #Load in the potential energies, INCLUDING RESTRAINT, at all states for this simulation to figure out frames to skip
    alcDat = np.loadtxt(alchemicalFile)
    startTime = alcDat[0, 1]
    startFrame = int(startTime) - 1 #Be careful here... need write frequency in alchemical file to match exactly with positions
                                    #AND assuming that have written in 1 ps increments... 
                                    #Also, first frame in trajectory is NOT at time zero, so subtract 1
    thisPot = alcDat[:, 3:-1]
    thispV = alcDat[:, -1]

    #Next load in the trajectory and get all solute coordinates that matter
    top.rb_torsions = pmd.TrackedList([])
    top = pt.load_parmed(top, traj=False)
    traj = pt.iterload(trajFile, top, frame_slice=(startFrame, -1))
    nFrames = len(traj)
    xyBox = np.array(traj[0].box.values)[:2] #A little lazy, but all boxes should be same and fixed in X and Y dimensions

    thisxyPos = np.zeros((nFrames, len(heavyIndices), 2))
    thisnSamps = np.zeros(numStates, dtype=int)

    #Reference x and y coordinates for this restraint
    thisRefXY = np.array([refX[i], refY[i]])

    for j, frame in enumerate(traj):

      thisPos = np.array(frame.xyz)
      thisXY = thisPos[heavyIndices[:,0]][:, :2] #Takes XY coords for first heavy atom from each solute
      thisxyPos[j,:] = thisXY 
      thisnSamps[int(alcDat[j, 2])] += 1 #Lambda states must be indexed starting at 0

      #Also get wrapped positions relative to each reference face
      #AND calculate xy restraint energy to remove by adding this for each solute
      xyEnergy = 0.0
      for k in range(len(heavyIndices)):
        xy = thisXY[k]
        #Then separately reimage around the restraint reference positions to calculate energy
        xy = wl.reimage([xy], thisRefXY, xyBox)[0] - thisRefXY
        xyEnergy += (  0.5*kXY[i]*(0.5*(np.sign(xy[0] - distRefX[i]) + 1))*((xy[0] - distRefX[i])**2)
                     + 0.5*kXY[i]*(0.5*(np.sign(xy[1] - distRefY[i]) + 1))*((xy[1] - distRefY[i])**2) )

      #Remove the restraint energy (only for x-y restraint... z is the same in all simulations)
      thisPot[j,:] -= (xyEnergy / kBT)

      #And also add in pV contribution
      thisPot[j,:] += thispV[j]

    #Add to other things we're keeping track of
    if xyPos is None:
      xyPos = copy.deepcopy(thisxyPos)
    else:
      xyPos = np.vstack((xyPos, thisxyPos))
    nSamps[i,:] = thisnSamps
    allPots = np.vstack((allPots, thisPot))

  #Now should have all the information we need
  #Next, put it into the format that MBAR wants, adding energies as needed
  Ukn = np.zeros((len(simDirs)*numStates, int(np.sum(nSamps))))

  for i in range(len(simDirs)):

    #First get energy of ith type of x-y restraint for all x-y positions
    thisRefXY = np.array([refX[i], refY[i]])
    #Must do by looping over each solute
    xyEnergy = np.zeros(xyPos.shape[0])
    for k in range(len(heavyIndices)):
      xy = wl.reimage(xyPos[:,k,:], thisRefXY, xyBox) - thisRefXY
      xyEnergy += (  0.5*kXY[i]*(0.5*(np.sign(xy[:,0] - distRefX[i]) + 1))*((xy[:,0] - distRefX[i])**2)
                   + 0.5*kXY[i]*(0.5*(np.sign(xy[:,1] - distRefY[i]) + 1))*((xy[:,1] - distRefY[i])**2) )

    #Loop over alchemical states with this restraint and add energy
    for j in range(numStates):
    
      Ukn[i*numStates+j, :] = allPots[:,j] + (xyEnergy / kBT)
  
  #Now should be set to run MBAR
  mbarObj = mbar.MBAR(Ukn, nSamps.flatten())

  #Following computePMF in MBAR to get configuration weights with desired potential of interest
  logwCoupled = mbarObj._computeUnnormalizedLogWeights(allPots[:,0])
  logwDecoupled = mbarObj._computeUnnormalizedLogWeights(allPots[:,-1])

  #Also report average solute-system LJ and coulombic potential energies in the fully coupled ensemble
  #(with restraints removed)
  #Just printing these values
  avgQ, stdQ = mbarObj.computeExpectations(allPots[:,0] - allPots[:,4], allPots[:,0])
  avgLJ, stdLJ = mbarObj.computeExpectations(allPots[:,4] - allPots[:,-1], allPots[:,0])
  print("\nAverage solute-system electrostatic potential energy: %f +/- %f"%(avgQ, stdQ))
  print("Average solute-system LJ potential energy: %f +/- %f\n"%(avgLJ, stdLJ))

  #Also print information that can be used to break free energy into components
  #Start by just printing all of the free energies between states
  alldGs, alldGerr = mbarObj.computePerturbedFreeEnergies(allPots.T)
  print("\nAll free energies relative to first (coupled) state:")
  print(alldGs.tolist())
  print(alldGerr.tolist())
  #And the free energy changes associated with just turning on LJ and elctrostatics separately
  dGq = alldGs[0][0] - alldGs[0][4]
  dGqErr = np.sqrt((alldGerr[0][0]**2) + (alldGerr[0][4])**2)
  print("\nElectrostatic dG (with LJ on): %f +/- %f"%(dGq, dGqErr))
  dGlj = alldGs[4][4] - alldGs[4][-1]
  dGljErr = np.sqrt((alldGerr[4][4]**2) + (alldGerr[4][-1])**2)
  print("\nLJ dG (no charges): %f +/- %f"%(dGlj, dGljErr))
  #Now calculate average potential energy differences needed for computing relative entropies
  dUq, dUqErr = mbarObj.computeExpectations(allPots[:,0] - allPots[:,4], allPots[:,0])
  print("\nAverage electrostatic potential energy in fully coupled state: %f +/- %f"%(dUq, dUqErr))
  dUlj, dUljErr = mbarObj.computeExpectations(allPots[:,4] - allPots[:,-1], allPots[:,4])
  print("\nAverage LJ potential energy (no charges) in uncharged state: %f +/- %f"%(dUlj, dUljErr))

  #And return weights after exponentiating log weights and normalizing
  wCoupled = np.exp(logwCoupled)
  wCoupled /= np.sum(wCoupled)
  wDecoupled = np.exp(logwDecoupled)
  wDecoupled /= np.sum(wDecoupled)

  return wCoupled, wDecoupled
Exemple #5
0
def main(args):

  print time.ctime(time.time())

  #Get topology file we're working with
  topFile = args[0]

  #And figure out if we're dealing with solute at surface or in bulk
  if (args[1] == 'True'):
    inBulk = True
  else:
    inBulk = False

  if (args[2] == 'True'):
    doReweight = True
  else:
    doReweight = False

  #Read in topology file now to get information on solute atoms
  top = pmd.load_file(topFile)
  soluteInds = []
  for res in top.residues:
    if res.name not in ['OTM', 'CTM', 'STM', 'NTM', 'SOL']:
      for atom in res.atoms:
        soluteInds.append(atom.idx)

  #Now define how we compute three-body angles with bins and cut-off
  #Shell cut-off
  shellCut = 3.32 #1st minimum distance for TIP4P-Ew water at 298.15 K and 1 bar
  #Number of angle bins
  nAngBins = 100 #500
  #Define bin centers (should be nBins equally spaced between 0 and 180)
  angBinCents = 0.5 * (np.arange(0.0, 180.001, 180.0/nAngBins)[:-1] + np.arange(0.0, 180.001, 180.0/nAngBins)[1:])

  #And distance bins for local oxygen-oxygen RDF calculation 
  #(really distance histograms from central oxygens - can normalize however we want, really)
  distBinWidth = 0.05
  nDistBins = int(shellCut / distBinWidth)
  distBins = np.arange(0.0, nDistBins*distBinWidth+0.00001, distBinWidth)
  distBinCents = 0.5 * (distBins[:-1] + distBins[1:])

  #Define the size of the probes used for assessing density fluctuations near solute
  probeRadius = 3.3 # radius in Angstroms; the DIAMETER of a methane so assumes other atoms methane-sized

  #And bins for numbers of waters in probes
  probeBins = np.arange(0.0, 21.00001, 1.0)
  nProbeBins = len(probeBins) - 1 #Will use np.histogram, which includes left edge in bin (so if want up to 20, go to 21)

  #And will record number waters in each solvation shell (histograms of)
  shellBins = np.arange(0.0, 251.00001, 1.0) #probably way too many bins, but don't want to run out
  nShellBins = len(shellBins) - 1

  #Should we do 2D histogram for angles and distances?
  #Or also do 2D histogram for number of waters in probe and three-body angle?
  #Interesting if make probe radius same size as three-body angle cutoff?

  #Finally, also define the bins for computing RDFs of waters near all solute atoms
  #Will use to define 1st and 2nd solvation shells
  rdfBinWidth = 0.2
  rdfMax = 12.00
  rdfBins = np.arange(0.0, rdfMax+0.000001, rdfBinWidth)
  rdfBinCents = 0.5 * (rdfBins[:-1] + rdfBins[1:])
  nRDFBins = len(rdfBinCents)
  rdfBinVols = (4.0*np.pi/3.0)*(rdfBins[1:]**3 - rdfBins[:-1]**3)
  bulkDens = 0.0332 #Roughly right for TIP4P-EW at 298.15 K and 1 bar in inverse Angstroms cubed

  #Need to create a variety of arrays to hold the data we're interested in
  #Will record distributions in both 1st and 2nd solvation shells
  shellCountsCoupled = np.zeros((nShellBins, 2)) #Histograms for numbers of waters in hydration shells of solutes
  probeHistsCoupled = np.zeros((nProbeBins, 2)) #Histograms for numbers waters in probes in 1st and 2nd hydration shells 
  angHistsCoupled = np.zeros((nAngBins, 2)) #Histograms of three-body angles for water oxygens within solvation shells
  distHistsCoupled = np.zeros((nDistBins, 2)) #Histograms of distances to water oxygens from central oxygens
  solRDFsCoupled = np.zeros((nRDFBins, len(soluteInds))) #RDFs between each solute atom and water oxygens
  shellCountsDecoupled = np.zeros((nShellBins, 2)) #Same as above, but decoupled state, not coupled state 
  probeHistsDecoupled = np.zeros((nProbeBins, 2)) 
  angHistsDecoupled = np.zeros((nAngBins, 2)) 
  distHistsDecoupled = np.zeros((nDistBins, 2)) 
  solRDFsDecoupled = np.zeros((nRDFBins, len(soluteInds))) 

  #First need configuration weights to use in computing average quantities
  #But only do if we're using a simuation with an expanded ensemble
  if doReweight:
    if inBulk:
      weightsCoupled, weightsDecoupled = getConfigWeightsBulk(kB=1.0, T=1.0)
      #Using 1 for kB and T because alchemical_output.txt should already have potential energies in kBT
      simDirs = ['.']
    else:
      weightsCoupled, weightsDecoupled = getConfigWeightsSurf()
      simDirs = ['Quad_0.25X_0.25Y', 'Quad_0.25X_0.75Y', 'Quad_0.75X_0.25Y', 'Quad_0.75X_0.75Y']
  else:
    weightsCoupled = np.array([])
    weightsDecoupled = np.array([])
    simDirs = ['.']

  #To correctly match weights up to configurations, need to count frames from all trajectories
  countFrames = 0

  #Next, want to loop over all trajectories and compute RDFs from solute atoms to water oxygens
  #Will use this to define solvation shells for finding other properties
  #Actually, having looked at RDFs, just use 5.5 for first shell and 8.5 for second shell...
  #AND use all atoms, including hydrogens, which have LJ interactions in GAFF2, to define shells
  #Actually now only using heavy atoms... but when look at RDFs, examine all atoms 
  for adir in simDirs:

    if doReweight:
      #Before loading trajectory, figure out how many frames to exclude due to weight equilibration
      alcDat = np.loadtxt(adir+'/alchemical_output.txt')
      startTime = alcDat[0, 1]
      startFrame = int(startTime) - 1
    else:
      startFrame = 0
 
    top = pmd.load_file(topFile)
    top.rb_torsions = pmd.TrackedList([]) #This is just for SAM systems so that it doesn't break pytraj
    top = pt.load_parmed(top, traj=False)
    traj = pt.iterload(adir+'/prod.nc', top, frame_slice=(startFrame, -1))

    if not doReweight:
      weightsCoupled = np.hstack((weightsCoupled, np.ones(len(traj))))
      weightsDecoupled = np.hstack((weightsDecoupled, np.ones(len(traj))))
  
    print("\nTopology and trajectory loaded from directory %s" % adir)

    owInds = top.select('@OW')
    soluteInds = top.select('!(:OTM,CTM,STM,NTM,SOL)')

    print("\n\tFound %i water oxygens" % len(owInds))
    print("\tFound %i solute atoms" % len(soluteInds))

    for i, frame in enumerate(traj):
      
      if i%1000 == 0:
        print "On frame %i" % i
    
      boxDims = np.array(frame.box.values[:3])

      currCoords = np.array(frame.xyz)

      #Wrap based on soluate atom center of geometry and get coordinates of interest
      wrapCOM = np.average(currCoords[soluteInds], axis=0)
      currCoords = wl.reimage(currCoords, wrapCOM, boxDims) - wrapCOM
      owCoords = currCoords[owInds]
      solCoords = currCoords[soluteInds]

      #Loop over solute atoms and find pair-distance histograms with water oxygens
      for j, acoord in enumerate(solCoords):
        solRDFsCoupled[:,j] += (weightsCoupled[countFrames+i]
                                * wl.pairdistancehistogram(np.array([acoord]), owCoords, rdfBinWidth, nRDFBins, boxDims))
        solRDFsDecoupled[:,j] += (weightsDecoupled[countFrames+i]
                                  * wl.pairdistancehistogram(np.array([acoord]), owCoords, rdfBinWidth, nRDFBins, boxDims))
        #Note that pairdistancehistogram is right-edge inclusive, NOT left-edge inclusive
        #In practice, not a big difference

    countFrames += len(traj)

  #Finish by normalizing RDFs properly
  for j in range(len(soluteInds)):
    solRDFsCoupled[:,j] /= rdfBinVols #bulkDens*rdfBinVols
    solRDFsDecoupled[:,j] /= rdfBinVols #bulkDens*rdfBinVols
  if not doReweight:
    solRDFsCoupled /= float(countFrames)
    solRDFsDecoupled /= float(countFrames)

  #And save to file
  np.savetxt('solute-OW_RDFs_coupled.txt', np.hstack((np.array([rdfBinCents]).T, solRDFsCoupled)), 
             header='RDF bins (A)    solute atom-OW RDF for solute atom indices %s'%(str(soluteInds)))
  np.savetxt('solute-OW_RDFs_decoupled.txt', np.hstack((np.array([rdfBinCents]).T, solRDFsDecoupled)), 
             header='RDF bins (A)    solute atom-OW RDF for solute atom indices %s'%(str(soluteInds)))

  print("\tFound RDFs for water oxygens from solute indices.")

  solShell1Cut = 5.5 #Angstroms from all solute atoms (including hydrogens)
  solShell2Cut = 8.5

  #And now that we know how many frames, we can assign real weights if not reweighting
  if not doReweight:
    weightsCoupled /= float(countFrames)
    weightsDecoupled /= float(countFrames)

  #Reset countFrames so get weights right
  countFrames = 0

  #Repeat looping over trajectories to calculate water properties in solute solvation shell
  for adir in simDirs:

    if doReweight:
      #Before loading trajectory, figure out how many frames to exclude due to weight equilibration
      alcDat = np.loadtxt(adir+'/alchemical_output.txt')
      startTime = alcDat[0, 1]
      startFrame = int(startTime) - 1
    else:
      startFrame = 0
 
    top = pmd.load_file(topFile)
    top.rb_torsions = pmd.TrackedList([]) #This is just for SAM systems so that it doesn't break pytraj
    top = pt.load_parmed(top, traj=False)
    traj = pt.iterload(adir+'/prod.nc', top, frame_slice=(startFrame, -1))
  
    print("\nTopology and trajectory loaded from directory %s" % adir)

    owInds = top.select('@OW')
    soluteInds = top.select('!(:OTM,CTM,STM,NTM,SOL)&!(@H=)')
    surfInds = top.select('(:OTM,CTM,STM,NTM)&!(@H=)') #For probe insertions, also include solute and surface heavy atoms

    print("\n\tFound %i water oxygens" % len(owInds))
    print("\tFound %i solute heavy atoms" % len(soluteInds))
    print("\tFound %i non-hydrogen surface atoms" % len(surfInds))

    if len(surfInds) == 0:
      surfInds.dtype=int

    for i, frame in enumerate(traj):
  
      #if i%10 == 0:
      #  print "On frame %i" % i
    
      boxDims = np.array(frame.box.values[:3])
  
      currCoords = np.array(frame.xyz)
  
      #Wrap based on soluate atom center of geometry and get coordinates of interest
      wrapCOM = np.average(currCoords[soluteInds], axis=0)
      currCoords = wl.reimage(currCoords, wrapCOM, boxDims) - wrapCOM
      owCoords = currCoords[owInds]
      solCoords = currCoords[soluteInds]
      surfCoords = currCoords[surfInds]

      #Now get solvent shells around solute
      shell1BoolMat = wl.nearneighbors(solCoords, owCoords, boxDims, 0.0, solShell1Cut)
      shell1Bool = np.array(np.sum(shell1BoolMat, axis=0), dtype=bool)

      shell2BoolMat = wl.nearneighbors(solCoords, owCoords, boxDims, solShell1Cut, solShell2Cut)
      shell2Bool = np.array(np.sum(shell2BoolMat, axis=0), dtype=bool)

      #And add weight to histogram for numbers of waters in shells
      thisCount1 = int(np.sum(shell1Bool))
      shellCountsCoupled[thisCount1, 0] += weightsCoupled[countFrames+i]
      shellCountsDecoupled[thisCount1, 0] += weightsDecoupled[countFrames+i]

      thisCount2 = int(np.sum(shell2Bool))
      shellCountsCoupled[thisCount2, 1] += weightsCoupled[countFrames+i]
      shellCountsDecoupled[thisCount2, 1] += weightsDecoupled[countFrames+i]

      #And compute water properties of solvent shells, first 3-body angles
      thisAngs1, thisNumAngs1 = wp.getCosAngs(owCoords[shell1Bool], owCoords, boxDims, highCut=shellCut)
      thisAngHist1, thisAngBins1 = np.histogram(thisAngs1, bins=nAngBins, range=[0.0, 180.0], density=False)
      angHistsCoupled[:,0] += weightsCoupled[countFrames+i] * thisAngHist1
      angHistsDecoupled[:,0] += weightsDecoupled[countFrames+i] * thisAngHist1

      thisAngs2, thisNumAngs2 = wp.getCosAngs(owCoords[shell2Bool], owCoords, boxDims, highCut=shellCut)
      thisAngHist2, thisAngBins2 = np.histogram(thisAngs2, bins=nAngBins, range=[0.0, 180.0], density=False)
      angHistsCoupled[:,1] += weightsCoupled[countFrames+i] * thisAngHist2
      angHistsDecoupled[:,1] += weightsDecoupled[countFrames+i] * thisAngHist2

      #And ow-ow pair distance histograms in both shells as well
      thisDistHist1 = wl.pairdistancehistogram(owCoords[shell1Bool], owCoords, distBinWidth, nDistBins, boxDims)
      distHistsCoupled[:,0] += weightsCoupled[countFrames+i] * thisDistHist1
      distHistsDecoupled[:,0] += weightsDecoupled[countFrames+i] * thisDistHist1

      thisDistHist2 = wl.pairdistancehistogram(owCoords[shell2Bool], owCoords, distBinWidth, nDistBins, boxDims)
      distHistsCoupled[:,1] += weightsCoupled[countFrames+i] * thisDistHist2
      distHistsDecoupled[:,1] += weightsDecoupled[countFrames+i] * thisDistHist2

      #Next compute distributions of numbers of waters in probes centered within each shell
      #To do this, create random grid of points in SQUARE that encompasses both shells
      #Then only keep points within each shell based on distance
      #Square will be based on shell cutoffs and min and max coordinates in each dimension of solute
      minSolX = np.min(solCoords[:,0]) - solShell2Cut
      maxSolX = np.max(solCoords[:,0]) + solShell2Cut
      minSolY = np.min(solCoords[:,1]) - solShell2Cut
      maxSolY = np.max(solCoords[:,1]) + solShell2Cut
      minSolZ = np.min(solCoords[:,2]) - solShell2Cut
      maxSolZ = np.max(solCoords[:,2]) + solShell2Cut
      thisGridX = minSolX + np.random.random(500)*(maxSolX - minSolX)
      thisGridY = minSolY + np.random.random(500)*(maxSolY - minSolY)
      thisGridZ = minSolZ + np.random.random(500)*(maxSolZ - minSolZ)
      thisGrid = np.vstack((thisGridX, thisGridY, thisGridZ)).T

      gridBoolMat1 = wl.nearneighbors(solCoords, thisGrid, boxDims, 0.0, solShell1Cut)
      gridBool1 = np.array(np.sum(gridBoolMat1, axis=0), dtype=bool)
      thisNum1 = wl.probegrid(np.vstack((owCoords, surfCoords, solCoords)), thisGrid[gridBool1], probeRadius, boxDims)
      thisProbeHist1, thisProbeBins1 = np.histogram(thisNum1, bins=probeBins, density=False)
      probeHistsCoupled[:,0] += weightsCoupled[countFrames+i] * thisProbeHist1
      probeHistsDecoupled[:,0] += weightsDecoupled[countFrames+i] * thisProbeHist1

      gridBoolMat2 = wl.nearneighbors(solCoords, thisGrid, boxDims, solShell1Cut, solShell2Cut)
      gridBool2 = np.array(np.sum(gridBoolMat2, axis=0), dtype=bool)
      thisNum2 = wl.probegrid(np.vstack((owCoords, surfCoords, solCoords)), thisGrid[gridBool2], probeRadius, boxDims)
      thisProbeHist2, thisProbeBins2 = np.histogram(thisNum2, bins=probeBins, density=False)
      probeHistsCoupled[:,1] += weightsCoupled[countFrames+i] * thisProbeHist2
      probeHistsDecoupled[:,1] += weightsDecoupled[countFrames+i] * thisProbeHist2

    countFrames += len(traj)

  #Should have everything we need, so save to text files
  np.savetxt('solute_shell_hists.txt', 
             np.hstack((np.array([shellBins[:-1]]).T, shellCountsCoupled, shellCountsDecoupled)),
             header='Histograms of numbers of waters in first and second solute solvation shells with solvent in coupled (columns 2, 3) and decoupled (columns 4, 5) states')
  np.savetxt('solute_probe_hists.txt', 
             np.hstack((np.array([probeBins[:-1]]).T, probeHistsCoupled, probeHistsDecoupled)),
             header='Number waters in probe histograms in first and second solute solvation shells with solvent in coupled (columns 2, 3) and decoupled (columns 4, 5) states')
  np.savetxt('solute_ang_hists.txt', 
             np.hstack((np.array([angBinCents]).T, angHistsCoupled, angHistsDecoupled)),
             header='3-body angle histograms in first and second solute solvation shells with solvent in coupled (columns 2, 3) and decoupled (columns 4, 5) states')
  np.savetxt('solute_pair_hists.txt', 
             np.hstack((np.array([distBinCents]).T, distHistsCoupled, distHistsDecoupled)),
             header='O-O pair-distance histograms in first and second solute solvation shells with solvent in coupled (columns 2, 3) and decoupled (columns 4, 5) states')

  print time.ctime(time.time())
def main(args):

    print time.ctime(time.time())

    #To define some things, read in the first simulation
    #Will then assume all other simulations are set up in an identical way
    #(i.e. same box size, same number of frames)
    topFile = args[0]
    simDirs = args[1:]

    #First load in the topology and trajectory
    top = pmd.load_file(topFile)
    top.rb_torsions = pmd.TrackedList(
        [])  #This is just for SAM systems so that it doesn't break pytraj
    top = pt.load_parmed(top, traj=False)
    traj = pt.iterload(simDirs[0] + '/prod.nc', top)

    boxDims = np.array(traj[0].box.values[:3])

    #Before starting, create bins in the x, y, and z-directions
    #This will be used to define instantaneous interfaces and record some spatially varying interfacial properties
    gridSize = 1.0  #Angstroms
    xGrid = np.arange(-boxDims[0] / 2.0, boxDims[0] / 2.0 + gridSize,
                      gridSize)  #Bins may overlap, but that's ok
    yGrid = np.arange(-boxDims[1] / 2.0, boxDims[1] / 2.0 + gridSize, gridSize)
    zGrid = np.arange((-boxDims[2] + boxDims[2] % gridSize) / 2.0,
                      (boxDims[2] - boxDims[2] % gridSize) / 2.0 + 0.001,
                      gridSize)  #Symmetrizing grid in z direction

    print "Using following coarse grids in x-, y-, and z-dimensions:"
    print xGrid
    print yGrid
    print zGrid

    #Define grid-point centers
    xGridLocs = 0.5 * (xGrid[:-1] + xGrid[1:])
    yGridLocs = 0.5 * (yGrid[:-1] + yGrid[1:])
    zGridLocs = 0.5 * (zGrid[:-1] + zGrid[1:])

    #And define grid sizes
    xSize = len(xGridLocs)
    ySize = len(yGridLocs)
    zSize = len(zGridLocs)

    #Now set-up fine grid for actually identifying the interface more precisely
    zGridSize = 0.1  #Set separate grid size in z direction for a fine grid used for finding all density profiles
    zGridFine = np.arange((-boxDims[2] + boxDims[2] % zGridSize) / 2.0,
                          (boxDims[2] - boxDims[2] % zGridSize) / 2.0 + 0.001,
                          zGridSize)

    #Define grid-point centers for fine grid
    zGridLocsFine = 0.5 * (zGridFine[:-1] + zGridFine[1:])
    zSizeFine = len(zGridLocsFine)

    #For calculating some properties as a function as distance from the surface, will use specific z slices
    #While all the grid stuff is relative to surface SU atoms, want z slices relative to the interface itself
    sliceGridSize = 0.5
    zSlice = np.arange(-6.0, 12.000001, sliceGridSize)
    zSliceLocs = 0.5 * (zSlice[:-1] + zSlice[1:])
    zSliceSize = len(zSliceLocs)

    #Set the default bulk density
    bulkDens = 0.0332  #Roughly right for TIP4P-EW at 298.15 K and 1 bar in inverse Angstroms cubed

    #Define the density fraction for determining the interface location
    fracDens = 0.3  #this is lower than 0.5 which is usually used by Willard and others
    #however, it puts the interface a little closer to the surface atoms and,
    #as Willard says in his 2014 paper, any value between 0.3 and 0.7 works
    densCut = fracDens * bulkDens
    print "\nUsing bulk density value of %f (TIP4P-Ew at 1 bar and 298.15 K)." % bulkDens
    print "To define interface, using a bulk density fraction of %f." % fracDens

    #Define the size of the probes used for assessing hydrophobicity
    probeRadius = 3.3  # radius in Angstroms; the DIAMETER of a methane (so assumes all other atoms methane-size)

    #Now define how we compute three-body angles with bins and cut-off
    #Shell cut-off
    shellCut = 3.32  #1st minimum distance for TIP4P-Ew water at 298.15 K and 1 bar
    #Number of angle bins
    nAngBins = 100  #500
    #Define bin centers (should be nBins equally spaced between 0 and 180)
    angBinCents = 0.5 * (np.arange(0.0, 180.001, 180.0 / nAngBins)[:-1] +
                         np.arange(0.0, 180.001, 180.0 / nAngBins)[1:])

    #And distance bins for local RDF calculation
    #(really distance histograms from central oxygens - can normalize however we want, really)
    distBinWidth = 0.05
    nDistBins = int(shellCut / distBinWidth)
    distBins = np.arange(0.0, nDistBins * distBinWidth + 0.00001, distBinWidth)
    distBinCents = 0.5 * (distBins[:-1] + distBins[1:])

    #And bins for numbers of waters in probes
    probeBins = np.arange(0.0, 21.00001, 1.0)
    nProbeBins = len(
        probeBins
    ) - 1  #Will use np.histogram, which includes left edge in bin (so if want up to 20, go to 21)

    #Should we do 2D histogram for angles and distances?
    #Or also do 2D histogram for number of waters in probe and three-body angle?
    #Interesting if make probe radius same size as three-body angle cutoff?

    #Need to create a variety of arrays to hold the data we're interested in
    interfaceZmean = np.zeros(
        (xSize, ySize)
    )  #Average mean interface height at each x-y bin - same in all x-y bins
    interfaceZ = np.zeros(
        (xSize,
         ySize))  #Average instantaneous interface height at each x-y bin
    interfaceZSq = np.zeros(
        (xSize, ySize
         ))  #Squared average interface height (to compute height fluctuations)
    #This can be more easily compared to mean-field studies
    watDensFine = np.zeros(
        zSizeFine
    )  #Water density on finer grid for instantaneous interface definition
    watDensFineMean = np.zeros(
        zSizeFine)  #Same but for mean interface definition
    surfDensFine = np.zeros(
        zSizeFine
    )  #Density profile for surface heavy atoms (ignores interface definition)
    probeHists = np.zeros(
        (nProbeBins, zSliceSize
         ))  #Histograms for numbers of waters in probes at each z-slice
    angHists = np.zeros(
        (nAngBins, zSliceSize)
    )  #Histograms of three-body angles for water oxygens within in each z-slice
    distHists = np.zeros(
        (nDistBins, zSliceSize
         ))  #Histograms of distances to water oxygens from central oxygens
    probeHistsMean = np.zeros(
        (nProbeBins,
         zSliceSize))  #And same definitions but for mean interfaces
    angHistsMean = np.zeros((nAngBins, zSliceSize))
    distHistsMean = np.zeros((nDistBins, zSliceSize))

    #Since will use multiple trajectories, also need to count total frames we're averaging over
    totFrames = 0.0

    #Now need to define variables to hold the surface points and normal vectors at each point
    #Note that by surface points, here we just mean the INDICES of the x, y, and z grids
    #To access the actual points, need to reference xGridLocs, etc.
    #In the 'instant' interface definition, the exact locations of surface points changes, but
    #only keep one surface point per x-y bin, so when project onto x-y plane to look at heat
    #plots can use the same grid point locations as for the fixed, 'mean' interface
    #With the 'mean' definition, this will be set below and unchanged when computing metrics
    surfacePoints = np.zeros((xSize * ySize, 3), dtype=int)
    surfaceNorms = np.zeros(
        (xSize * ySize, 3))  #Should keep as normalized vectors throughout
    surfacePointsMean = np.zeros((xSize * ySize, 3), dtype=int)
    surfaceNormsMean = np.zeros((xSize * ySize, 3))

    #But also want an actual list of all possible grid-point locations...
    xmesh, ymesh, zmesh = np.meshgrid(np.arange(xSize), np.arange(ySize),
                                      np.arange(zSize))
    gridPoints = np.vstack(
        (xmesh.flatten(), ymesh.flatten(), zmesh.flatten())).T

    #At this point, want to actually loop over the simulation directories, load simulations,
    #and do calculations for each frame

    for adir in simDirs:

        top = pmd.load_file(topFile)
        top.rb_torsions = pmd.TrackedList(
            [])  #This is just for SAM systems so that it doesn't break pytraj
        top = pt.load_parmed(top, traj=False)
        traj = pt.iterload(adir + '/prod.nc', top)

        print("\nTopology and trajectory loaded from directory %s" % adir)

        owInds = top.select('@OW')
        surfInds = top.select('(:OTM,CTM,STM,NTM)&!(@H=)')
        suInds = top.select('@SU')
        print("\n\tFound %i water oxygens" % len(owInds))
        print("\tFound %i non-hydrogen surface atoms" % len(surfInds))
        print("\tFound %i SU surface atoms" % len(suInds))

        nFrames = float(
            traj.n_frames)  #Make it a float to make averaging easier later
        totFrames += nFrames

        tempWatDensFineMean = np.zeros(zSizeFine)

        #Find the mean interface definition to use for this trajectory
        #Need to loop over the trajectory once and find mean interface location
        for i, frame in enumerate(traj):

            boxDims = np.array(frame.box.values[:3])

            currCoords = np.array(frame.xyz)

            #Need to do wrapping procedure by first putting surface together, then wrapping water around it
            wrapCOM1 = currCoords[suInds[0]]
            currCoords = wl.reimage(currCoords, wrapCOM1, boxDims) - wrapCOM1
            wrapCOM2 = np.average(currCoords[suInds], axis=0)
            currCoords = wl.reimage(currCoords, wrapCOM2, boxDims) - wrapCOM2

            #Bin the current z coordinates
            thisSurfDens, tempBins = np.histogram(currCoords[surfInds, 2],
                                                  bins=zGridFine,
                                                  normed=False)
            thisWatDens, tempBins = np.histogram(currCoords[owInds, 2],
                                                 bins=zGridFine,
                                                 normed=False)
            surfDensFine += thisSurfDens
            tempWatDensFineMean += thisWatDens

        #Record fine density, then normalize to number densities for next calculation
        watDensFineMean += tempWatDensFineMean
        tempWatDensFineMean = tempWatDensFineMean / (nFrames * boxDims[0] *
                                                     boxDims[1] * zGridSize)

        #Find the average density in a region far from the surface to use as a reference
        #Recall that the wrapping procedure puts the surface center of geometry at the origin
        refOWdens = np.average(
            0.5 * (tempWatDensFineMean[-int(1.0 / zGridSize) - 1:-1] +
                   tempWatDensFineMean[1:int(1.0 / zGridSize) + 1]))
        #Above uses 1.0 A slices at edges of wrapped box
        #It's a useful quantity to report, but better to just use fixed cut-off for all simulations...
        print "\n\tDensity value in center of water phase (far from interface): %f" % refOWdens

        #And find where the water density is half of its "bulk" value
        #Exclude grid points near box edge because fluctuations in box lead to weird densities there
        loInd = np.argmin(abs(tempWatDensFineMean[5:(zSizeFine / 2)] -
                              densCut))

        print "\tOn lower surface, mean interface is at following Z-coordinate:"
        print "\t  Lower: %f" % zGridLocsFine[loInd]

        #Now set up arrays of surface points and surface normal vectors - easy for mean interface
        thisxmesh, thisymesh = np.meshgrid(np.arange(xSize, dtype=int),
                                           np.arange(ySize, dtype=int))
        surfacePointsMean[:, 0:2] = np.vstack(
            (thisxmesh.flatten(), thisymesh.flatten())).T
        surfacePointsMean[:, 2] = loInd
        surfaceNormsMean[:,
                         2] = -1.0  #Just working with LOWER surface (solute is on top)

        #Just go ahead and record the average interface height at each x and y bin, since it won't change
        interfaceZmean[:, :] += zGridLocsFine[loInd]

        #Now should be ready to loop over the trajectory using both interface definitions and computing things
        print "\nPre-processing finished, starting main loop over trajectory."

        for i, frame in enumerate(traj):

            #if i%1000 == 0:
            #  print "On frame %i" % i

            boxDims = np.array(frame.box.values[:3])

            currCoords = np.array(frame.xyz)

            #Need to do a wrapping procedure to more easily find waters within certain layers of surface
            wrapCOM1 = currCoords[suInds[0]]
            currCoords = wl.reimage(currCoords, wrapCOM1, boxDims) - wrapCOM1
            wrapCOM2 = np.average(currCoords[suInds], axis=0)
            currCoords = wl.reimage(currCoords, wrapCOM2, boxDims) - wrapCOM2

            OWCoords = currCoords[owInds]
            suCoords = currCoords[suInds]
            surfCoords = currCoords[surfInds]
            surfMidZ = np.average(suCoords[:, 2])

            #Get actual locations of surface points (not just indices)
            #Will overwrite with off-lattice x, y, and z positions for instantaneous interface
            #Just use fine z-grid for mean definition
            thisSurf = np.vstack(
                (xGridLocs[surfacePoints[:, 0]], yGridLocs[surfacePoints[:,
                                                                         1]],
                 zGridLocsFine[surfacePoints[:, 2]])).T
            thisSurfMean = np.vstack((xGridLocs[surfacePointsMean[:, 0]],
                                      yGridLocs[surfacePointsMean[:, 1]],
                                      zGridLocsFine[surfacePointsMean[:,
                                                                      2]])).T

            #Use water coordinates to find instantaneous interface
            #First need to find the density field
            #Note that we use 2.4 as the smoothing length, as in Willard and Chandler, 2010
            thisdensfield, thisdensnorms = wl.willarddensityfield(
                OWCoords, xGridLocs, yGridLocs, zGridLocs, boxDims, 2.4)

            #Next need to define the interface - using the marchinge cubes algorithm in scikit-image
            verts, faces, normals, values = skmeasure.marching_cubes_lewiner(
                thisdensfield, densCut, spacing=(gridSize, gridSize, gridSize))

            #Shift the points returned...
            verts[:, 0] += xGrid[0]
            verts[:, 1] += yGrid[0]
            verts[:, 2] += zGrid[0]

            #And make sure the points are in the box
            #Actually, for the purposes of making sure we have a single interface point for each x-y grid cell,
            #we DO NOT want to wrap. This is because the grid may extend past the box size. For accurately
            #calculating distances this is an issue because points outside the box will never overlap with
            #wrapped atoms. But, we can't wrap now... have to wait until after we have our interface points.

            #Below plot is for debugging only
            #fig = plt.figure(figsize=(10, 10))
            #ax = fig.add_subplot(111, projection='3d')
            #ax.scatter(verts[:,0], verts[:,1], verts[:,2])
            #ax.set_xlabel('X')
            #ax.set_ylabel('Y')
            #ax.set_zlabel('Z')
            #fig.tight_layout()
            #plt.show()

            #Now need to trim the points so that we only have one point below the surface for
            #each x-y bin. To do this, I'm taking the min z of the upper surface and max z of the lower surface.
            #With this definition, hopefully odd blips in the bulk that satisfy the isosurface definition will be
            #excluded.
            newvertmat = np.ones(
                (xSize, ySize,
                 3))  #Will flatten, but for now use data structure to help
            newvertmat[:, :, 2] = -10000.0

            #Loop over old vertices
            for avert in verts:
                thisXind = np.digitize([avert[0]], xGrid)[0] - 1
                thisYind = np.digitize([avert[1]], yGrid)[0] - 1
                #Check the lower interface
                if (avert[2] < surfMidZ
                        and avert[2] > newvertmat[thisXind, thisYind, 2]
                        and avert[2] > zGrid[0]):
                    newvertmat[thisXind, thisYind, :] = avert

            #Need to make sure that all x-y bins had a vertex in them...
            #If not, use the z-value of one of the 4 adjacent points that isn't also too large
            unfilledbins = np.where(abs(newvertmat[:, :, :]) == 10000.0)
            for l in range(len(unfilledbins[0])):
                ind1 = unfilledbins[0][l]  #The x bin
                ind2 = unfilledbins[1][l]  #The y bin
                newvertmat[ind1, ind2, 0] = xGridLocs[ind1]
                newvertmat[ind1, ind2, 1] = yGridLocs[ind2]
                #Use modulo operator to do wrapping
                if abs(newvertmat[(ind1 - 1) % xSize, ind2, 2]) < 1000.0:
                    newvertmat[ind1, ind2, 2] = newvertmat[(ind1 - 1) % xSize,
                                                           ind2, 2]
                elif abs(newvertmat[(ind1 + 1) % xSize, ind2, 2]) < 1000.0:
                    newvertmat[ind1, ind2, 2] = newvertmat[(ind1 + 1) % xSize,
                                                           ind2, 2]
                elif abs(newvertmat[ind1, (ind2 - 1) % ySize, 2]) < 1000.0:
                    newvertmat[ind1, ind2,
                               2] = newvertmat[ind1, (ind2 - 1) % ySize, 2]
                elif abs(newvertmat[ind1, (ind2 + 1) % ySize, 2]) < 1000.0:
                    newvertmat[ind1, ind2,
                               2] = newvertmat[ind1, (ind2 + 1) % ySize, 2]

            #While the points are in a convenient format, record the interface height at each x-y bin
            interfaceZ += newvertmat[:, :, 2]
            interfaceZSq += newvertmat[:, :, 2]**2

            #If the above procedure didn't fix the issue, just quit and recommend a finer z-grid size
            unfilledbins = np.where(abs(newvertmat[:, :, 2]) == 10000.0)
            if len(unfilledbins[0]) > 0:
                print "Error: after trimming surface points, unable to find surface point for each x-y bin."
                print "Could fix this by not using all bins and keeping track of bin counts, but maybe later."
                print "Try and use a finer bin size in the z-dimension."
                sys.exit(2)

            #Now put together list of surface points (indices on pre-set grid) and exact locations (and unit normals)
            newverts = np.reshape(newvertmat[:, :, :], (xSize * ySize, 3))

            #fig = plt.figure(figsize=(10, 10))
            #ax = fig.add_subplot(111, projection='3d')
            #ax.scatter(newverts[:,0], newverts[:,1], newverts[:,2], c='gray')
            #ax.set_xlabel('X')
            #ax.set_ylabel('Y')
            #ax.set_zlabel('Z')
            #fig.tight_layout()
            #plt.show()

            surfacePoints[:, 0] = np.digitize(newverts[:, 0], xGrid) - 1
            surfacePoints[:, 1] = np.digitize(newverts[:, 1], yGrid) - 1
            surfacePoints[:, 2] = np.digitize(newverts[:, 2], zGrid) - 1

            thisSurf = copy.deepcopy(newverts)
            unusedDensVals, surfaceNorms = wl.willarddensitypoints(
                OWCoords, thisSurf, boxDims, 2.4)

            #print surfacePoints
            #print thisSurf
            #print surfaceNorms
            #print np.linalg.norm(surfaceNorms, axis=1)

            #At this point, MUST wrap our interface points into box so that distances from waters to them are accurate
            #We couldn't do this earlier because we need to assign a single interface point to each x-y grid cell
            thisSurf = wl.reimage(thisSurf, np.zeros(3), boxDims)
            thisSurfMean = wl.reimage(thisSurfMean, np.zeros(3), boxDims)

            #fig = plt.figure(figsize=(10, 10))
            #ax = fig.add_subplot(111, projection='3d')
            #ax.scatter(thisSurf[:,0], thisSurf[:,1], thisSurf[:,2], c='orange')
            #ax.set_xlabel('X')
            #ax.set_ylabel('Y')
            #ax.set_zlabel('Z')
            #fig.tight_layout()
            #plt.show()

            #Want to find the density profile
            #To do this, first need to find which surface point closest to each water
            #Then project that water's distance from the point along the surface normal
            #This gives distances
            #Note that surfaceNorms should be normalized to length 1!
            #Also, don't worry about the random cutoff of 3.0 that I supplied... this is just part of the routine
            #Really only want thisWatDists from this function - other stuff is not as useful for this code
            thisWatClose, thisSurfClose, thisSliceNum, thisWatDists = wl.interfacewater(
                OWCoords, thisSurf, surfaceNorms, 3.0, boxDims)

            #Now add to the instantaneous fine water density profile - mean is already done
            thisWatHist, tempBins = np.histogram(thisWatDists,
                                                 bins=zGridFine,
                                                 normed=False)
            watDensFine += thisWatHist

            #Now want to look at properties within z-slices moving normal to both interface definitions

            #For three-body angles and pair distances, digitize waters for each interface definition
            thisSliceInds = np.digitize(thisWatDists, zSlice) - 1
            thisSliceIndsMean = np.digitize(
                -1.0 *
                (OWCoords[:, 2] - zGridLocsFine[surfacePointsMean[0, 2]]),
                zSlice) - 1

            #For probe insertions, placing probes at random x, y, and z locations within slices
            #Make sure to wrap points after randomization to make sure not outside the simulation box
            #Note different grid spacing for z slices, so need to do z separately
            randomGrid = np.zeros(thisSurf.shape)
            randomGrid[:, :2] = thisSurf[:, :2] + (np.random.random_sample(
                (len(thisSurf), 2)) - 0.5) * 2.0 * gridSize
            randomGrid[:, 2] = thisSurf[:, 2] + (np.random.random_sample(
                len(thisSurf)) - 0.5) * 2.0 * sliceGridSize
            randomGrid = wl.reimage(randomGrid, np.zeros(3), boxDims)
            randomGridMean = np.zeros(thisSurfMean.shape)
            randomGridMean[:, :2] = thisSurfMean[:, :2] + (
                np.random.random_sample(
                    (len(thisSurfMean), 2)) - 0.5) * 2.0 * gridSize
            randomGridMean[:,
                           2] = thisSurfMean[:, 2] + (np.random.random_sample(
                               len(thisSurfMean)) - 0.5) * 2.0 * sliceGridSize
            randomGridMean = wl.reimage(randomGridMean, np.zeros(3), boxDims)

            #And loop over z indices, selecting waters and calculating what we want in those slices
            for j in range(zSliceSize):

                thisSliceCoords = OWCoords[np.where(thisSliceInds == j)[0]]
                thisSliceCoordsMean = OWCoords[np.where(
                    thisSliceIndsMean == j)[0]]

                #Make sure we have waters in slices before attempting anything
                if len(thisSliceCoords) > 0:

                    #Three-body angles
                    thisAngs, thisNumAngs = wp.getCosAngs(thisSliceCoords,
                                                          OWCoords,
                                                          boxDims,
                                                          highCut=shellCut)
                    thisAngHist, thisAngBins = np.histogram(thisAngs,
                                                            bins=nAngBins,
                                                            range=[0.0, 180.0],
                                                            density=False)
                    angHists[:, j] += thisAngHist

                    #Distance histograms with these slice oxygens as central oxygens
                    thisDistHist = wl.pairdistancehistogram(
                        thisSliceCoords, OWCoords, distBinWidth, nDistBins,
                        boxDims)
                    distHists[:, j] += thisDistHist

                #Now probe occupancies
                #Need to get random z locations within this slice
                thisGrid = randomGrid + surfaceNorms * zSliceLocs[j]
                thisNum = wl.probegrid(np.vstack((OWCoords, surfCoords)),
                                       thisGrid, probeRadius, boxDims)
                thisProbeHist, thisProbeBins = np.histogram(thisNum,
                                                            bins=probeBins,
                                                            density=False)
                probeHists[:, j] += thisProbeHist

                if len(thisSliceCoordsMean) > 0:

                    thisAngsMean, thisNumAngsMean = wp.getCosAngs(
                        thisSliceCoordsMean,
                        OWCoords,
                        boxDims,
                        highCut=shellCut)
                    thisAngHistMean, thisAngBinsMean = np.histogram(
                        thisAngsMean,
                        bins=nAngBins,
                        range=[0.0, 180.0],
                        density=False)
                    angHistsMean[:, j] += thisAngHistMean

                    thisDistHistMean = wl.pairdistancehistogram(
                        thisSliceCoordsMean, OWCoords, distBinWidth, nDistBins,
                        boxDims)
                    distHistsMean[:, j] += thisDistHistMean

                thisGridMean = randomGridMean + surfaceNormsMean * zSliceLocs[j]
                thisNumMean = wl.probegrid(np.vstack((OWCoords, surfCoords)),
                                           thisGridMean, probeRadius, boxDims)
                thisProbeHistMean, thisProbeBinsMean = np.histogram(
                    thisNumMean, bins=probeBins, density=False)
                probeHistsMean[:, j] += thisProbeHistMean

    #Done looping over trajectories, etc.
    #Now finish computing some quantities by averaging appropriately
    interfaceZmean /= float(len(simDirs))
    interfaceZ /= totFrames
    interfaceZSq /= totFrames
    watDensFine /= (totFrames * boxDims[0] * boxDims[1] * zGridSize)
    watDensFineMean /= (totFrames * boxDims[0] * boxDims[1] * zGridSize)
    surfDensFine /= (totFrames * boxDims[0] * boxDims[1] * zGridSize)
    #Just let the histograms be histograms... can normalize later if need to (don't think numbers will get too big)
    #probeHists /= totFrames
    #angHists /= totFrames
    #distHists /= totFrames
    #probeHistsMean /= totFrames
    #angHistsMean /= totFrames
    #distHistsMean /= totFrames

    #Now save everything to files
    #For 2D stuff (i.e. interfaces), use netCDF4 format
    outdata = Dataset("interface_Z.nc", "w", format="NETCDF4", zlib=True)
    #outdata.description("Interface heights (in the z-direction from surface atoms) and fluctuations for both mean and instantaneous interfaces.")
    #outdata.history = "Created " + time.ctime(time.time())
    xdim = outdata.createDimension("XGridPoints", xSize)
    ydim = outdata.createDimension("YGridPoints", ySize)
    outXLocs = outdata.createVariable("XGridPoints", "f8", ("XGridPoints", ))
    outXLocs.units = "Angstroms"
    outYLocs = outdata.createVariable("YGridPoints", "f8", ("YGridPoints", ))
    outYLocs.units = "Angstroms"
    outIntZ = outdata.createVariable("InterfaceHeightInstant", "f8", (
        "XGridPoints",
        "YGridPoints",
    ))
    outIntZ.units = "Angstroms relative to SU surface atoms"
    outIntZ[:, :] = interfaceZ
    outIntZMean = outdata.createVariable("InterfaceHeightMean", "f8", (
        "XGridPoints",
        "YGridPoints",
    ))
    outIntZMean.units = "Angstroms relative to SU surface atoms"
    outIntZMean[:, :] = interfaceZmean
    outIntZSq = outdata.createVariable("InterfaceHeightInstantSq", "f8", (
        "XGridPoints",
        "YGridPoints",
    ))
    outIntZSq.units = "Squared Angstroms relative to SU surface atoms"
    outIntZSq[:, :] = interfaceZSq
    outdata.close()
    #For everything else, just use text files, even though some may be a little big (i.e. angles and pair distances)
    np.savetxt(
        'z-densities.txt',
        np.vstack(
            (zGridLocsFine, surfDensFine, watDensFineMean, watDensFine)).T,
        header=
        'Distance normal to interface (A)     Surface heavy atom density (1/A^3)      Water oxygen density mean interface (at %f)       Water oxygen density instantaneous interface'
        % interfaceZmean[0, 0])
    np.savetxt(
        'probe_hists_mean.txt',
        np.hstack((np.array([probeBins[:-1]]).T, probeHistsMean)),
        header=
        'Number waters in probe histograms at following distances from mean interface: %s'
        % str(zSliceLocs))
    np.savetxt(
        'probe_hists_instant.txt',
        np.hstack((np.array([probeBins[:-1]]).T, probeHists)),
        header=
        'Number waters in probe histograms at following distances from instantaneous interface: %s'
        % str(zSliceLocs))
    np.savetxt(
        'ang3b_hists_mean.txt',
        np.hstack((np.array([angBinCents]).T, angHistsMean)),
        header=
        '3-body angle histograms at following distances from mean interface: %s'
        % str(zSliceLocs))
    np.savetxt(
        'ang3b_hists_instant.txt',
        np.hstack((np.array([angBinCents]).T, angHists)),
        header=
        '3-body angle histograms at following distances from instantaneous interface: %s'
        % str(zSliceLocs))
    np.savetxt(
        'pair_hists_mean.txt',
        np.hstack((np.array([distBinCents]).T, distHistsMean)),
        header=
        'O-O pair-distance histograms at following distances from mean interface: %s'
        % str(zSliceLocs))
    np.savetxt(
        'pair_hists_instant.txt',
        np.hstack((np.array([distBinCents]).T, distHists)),
        header=
        'O-O pair-distance histograms at following distances from instantaneous interface: %s'
        % str(zSliceLocs))

    print time.ctime(time.time())
def main(args):
    #Get the structure, topology, and trajectory files from the command line
    #ParmEd accepts a wide range of file types (Amber, GROMACS, CHARMM, OpenMM... but not LAMMPS)
    try:
        topFile = args[0]
        strucFile = args[1]
        trajFile = args[2]
    except IndexError:
        print(
            "Specify topology, structure, and trajectory files from the command line."
        )
        print(Usage)
        sys.exit(2)

    #And also allow user to specify start frame, but default to zero if no input
    try:
        startFrame = int(args[3])
    except IndexError:
        startFrame = 0

    #And get information on whether or not to use a restraint
    try:
        boolstr = args[4]
        if boolstr.lower() == 'true' or boolstr.lower() == 'yes':
            restraintBool = True
        else:
            restraintBool = False
    except IndexError:
        restraintBool = False

    print("Using topology file: %s" % topFile)
    print("Using structure file: %s" % strucFile)
    print("Using trajectory file: %s" % trajFile)

    print("\nSetting up system...")

    #Load in the files for initial simulations
    top = pmd.load_file(topFile)
    struc = pmd.load_file(strucFile)

    #Transfer unit cell information to topology object
    top.box = struc.box[:]

    #Set up some global features to use in all simulations
    temperature = 298.15 * u.kelvin

    #Define the platform (i.e. hardware and drivers) to use for running the simulation
    #This can be CUDA, OpenCL, CPU, or Reference
    #CUDA is for NVIDIA GPUs
    #OpenCL is for CPUs or GPUs, but must be used for old CPUs (not SSE4.1 compatible)
    #CPU only allows single precision (CUDA and OpenCL allow single, mixed, or double)
    #Reference is a clear, stable reference for other code development and is very slow, using double precision by default
    platform = mm.Platform.getPlatformByName('CUDA')
    prop = {  #'Threads': '1', #number of threads for CPU - all definitions must be strings (I think)
        'Precision':
        'mixed',  #for CUDA or OpenCL, select the precision (single, mixed, or double)
        'DeviceIndex':
        '0',  #selects which GPUs to use - set this to zero if using CUDA_VISIBLE_DEVICES
        'DeterministicForces':
        'True'  #Makes sure forces with CUDA and PME are deterministic
    }

    #Create the OpenMM system that can be used as a reference
    systemRef = top.createSystem(
        nonbondedMethod=app.
        PME,  #Uses PME for long-range electrostatics, simple cut-off for LJ
        nonbondedCutoff=12.0 *
        u.angstroms,  #Defines cut-off for non-bonded interactions
        rigidWater=True,  #Use rigid water molecules
        constraints=app.HBonds,  #Constrains all bonds involving hydrogens
        flexibleConstraints=
        False,  #Whether to include energies for constrained DOFs
        removeCMMotion=
        True,  #Whether or not to remove COM motion (don't want to if part of system frozen)
    )

    #Set up the integrator to use as a reference
    integratorRef = mm.LangevinIntegrator(
        temperature,  #Temperature for Langevin
        1.0 / u.picoseconds,  #Friction coefficient
        2.0 * u.femtoseconds,  #Integration timestep
    )
    integratorRef.setConstraintTolerance(1.0E-08)

    #Get solute atoms and solute heavy atoms separately
    soluteIndices = []
    heavyIndices = []
    for res in top.residues:
        if res.name not in ['OTM', 'CTM', 'STM', 'NTM', 'SOL']:
            for atom in res.atoms:
                soluteIndices.append(atom.idx)
                if 'H' not in atom.name[0]:
                    heavyIndices.append(atom.idx)

    #If working with expanded ensemble simulation of solute near the interface, need to include restraint to keep close
    if restraintBool:
        #Also get surface SU atoms and surface CU atoms at top and bottom of surface
        surfIndices = []
        for atom in top.atoms:
            if atom.type == 'SU':
                surfIndices.append(atom.idx)

        print("\nSolute indices: %s" % str(soluteIndices))
        print("Solute heavy atom indices: %s" % str(heavyIndices))
        print("Surface SU atom indices: %s" % str(surfIndices))

        #Will now add a custom bonded force between heavy atoms of each solute and surface SU atoms
        #Should be in units of kJ/mol*nm^2, but should check this
        #Also, note that here we are using a flat-bottom restraint to keep close to surface
        #AND to keep from penetrating into surface when it's in the decoupled state
        refZlo = 1.4 * u.nanometer  #in nm, the distance between the SU atoms and the solute centroid
        refZhi = 1.7 * u.nanometer
        restraintExpression = '0.5*k*step(refZlo - (z2 - z1))*(((z2 - z1) - refZlo)^2)'
        restraintExpression += '+ 0.5*k*step((z2 - z1) - refZhi)*(((z2 - z1) - refZhi)^2)'
        restraintForce = mm.CustomCentroidBondForce(2, restraintExpression)
        restraintForce.addPerBondParameter('k')
        restraintForce.addPerBondParameter('refZlo')
        restraintForce.addPerBondParameter('refZhi')
        restraintForce.addGroup(surfIndices, np.ones(
            len(surfIndices)))  #Don't weight with masses
        #To assign flat-bottom restraint correctly, need to know if each solute is above or below interface
        #Will need surface z-positions for this
        suZpos = np.average(struc.coordinates[surfIndices, 2])
        restraintForce.addGroup(heavyIndices, np.ones(len(heavyIndices)))
        solZpos = np.average(struc.coordinates[heavyIndices, 2])
        if (solZpos - suZpos) > 0:
            restraintForce.addBond([0, 1], [10000.0, refZlo, refZhi])
        else:
            #A little confusing, but have to negate and switch for when z2-z1 is always going to be negative
            restraintForce.addBond([0, 1], [10000.0, -refZhi, -refZlo])
        systemRef.addForce(restraintForce)

    #And define lambda states of interest
    lambdaVec = np.array(  #electrostatic lambda - 1.0 is fully interacting, 0.0 is non-interacting
        [
            [
                1.00, 0.75, 0.50, 0.25, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00,
                0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00
            ],
            #LJ lambdas - 1.0 is fully interacting, 0.0 is non-interacting
            [
                1.00, 1.00, 1.00, 1.00, 1.00, 0.90, 0.80, 0.70, 0.60, 0.50,
                0.40, 0.35, 0.30, 0.25, 0.20, 0.15, 0.10, 0.05, 0.00
            ]
        ])

    #We need to add a custom non-bonded force for the solute being alchemically changed
    #Will be helpful to have handle on non-bonded force handling LJ and coulombic interactions
    NBForce = None
    for frc in systemRef.getForces():
        if (isinstance(frc, mm.NonbondedForce)):
            NBForce = frc

    #Turn off dispersion correction since have interface
    NBForce.setUseDispersionCorrection(False)

    #Separate out alchemical and regular particles using set objects
    alchemicalParticles = set(soluteIndices)
    chemicalParticles = set(range(
        systemRef.getNumParticles())) - alchemicalParticles

    #Define the soft-core function for turning on/off LJ interactions
    #In energy expressions for CustomNonbondedForce, r is a special variable and refers to the distance between particles
    #All other variables must be defined somewhere in the function.
    #The exception are variables like sigma1 and sigma2.
    #It is understood that a parameter will be added called 'sigma' and that the '1' and '2' are to specify the combining rule.
    #Have also added parameter to switch the soft-core interaction to a WCA potential
    softCoreFunctionWCA = '(step(x-0.5))*(4.0*lambdaLJ*epsilon*x*(x-1.0) + (1.0-lambdaWCA)*lambdaLJ*epsilon) '
    softCoreFunctionWCA += '+ (1.0 - step(x-0.5))*lambdaWCA*(4.0*lambdaLJ*epsilon*x*(x-1.0));'
    softCoreFunctionWCA += 'x = (1.0/reff_sterics);'
    softCoreFunctionWCA += 'reff_sterics = (0.5*(1.0-lambdaLJ) + ((r/sigma)^6));'
    softCoreFunctionWCA += 'sigma=0.5*(sigma1+sigma2); epsilon = sqrt(epsilon1*epsilon2)'
    #Define the system force for this function and its parameters
    SoftCoreForceWCA = mm.CustomNonbondedForce(softCoreFunctionWCA)
    SoftCoreForceWCA.addGlobalParameter(
        'lambdaLJ', 1.0
    )  #Throughout, should follow convention that lambdaLJ=1.0 is fully-interacting state
    SoftCoreForceWCA.addGlobalParameter(
        'lambdaWCA',
        1.0)  #When 1, attractions included; setting to 0 turns off attractions
    SoftCoreForceWCA.addPerParticleParameter('sigma')
    SoftCoreForceWCA.addPerParticleParameter('epsilon')

    #Will turn off electrostatics completely in the original non-bonded force
    #In the end-state, only want electrostatics inside the alchemical molecule
    #To do this, just turn ON a custom force as we turn OFF electrostatics in the original force
    ONE_4PI_EPS0 = 138.935456  #in kJ/mol nm/e^2
    soluteCoulFunction = '(1.0-(lambdaQ^2))*ONE_4PI_EPS0*charge/r;'
    soluteCoulFunction += 'ONE_4PI_EPS0 = %.16e;' % (ONE_4PI_EPS0)
    soluteCoulFunction += 'charge = charge1*charge2'
    SoluteCoulForce = mm.CustomNonbondedForce(soluteCoulFunction)
    #Note this lambdaQ will be different than for soft core (it's also named differently, which is CRITICAL)
    #This lambdaQ corresponds to the lambda that scales the charges to zero
    #To turn on this custom force at the same rate, need to multiply by (1.0-lambdaQ**2), which we do
    SoluteCoulForce.addGlobalParameter('lambdaQ', 1.0)
    SoluteCoulForce.addPerParticleParameter('charge')

    #Also create custom force for intramolecular alchemical LJ interactions
    #Could include with electrostatics, but nice to break up
    #We could also do this with a separate NonbondedForce object, but it would be a little more work, actually
    soluteLJFunction = '4.0*epsilon*x*(x-1.0); x = (sigma/r)^6;'
    soluteLJFunction += 'sigma=0.5*(sigma1+sigma2); epsilon=sqrt(epsilon1*epsilon2)'
    SoluteLJForce = mm.CustomNonbondedForce(soluteLJFunction)
    SoluteLJForce.addPerParticleParameter('sigma')
    SoluteLJForce.addPerParticleParameter('epsilon')

    #Loop over all particles and add to custom forces
    #As we go, will also collect full charges on the solute particles
    #AND we will set up the solute-solute interaction forces
    alchemicalCharges = [[0]] * len(soluteIndices)
    for ind in range(systemRef.getNumParticles()):
        #Get current parameters in non-bonded force
        [charge, sigma, epsilon] = NBForce.getParticleParameters(ind)
        #Make sure that sigma is not set to zero! Fine for some ways of writing LJ energy, but NOT OK for soft-core!
        if sigma / u.nanometer == 0.0:
            newsigma = 0.3 * u.nanometer  #This 0.3 is what's used by GROMACS as a default value for sc-sigma
        else:
            newsigma = sigma
        #Add the particle to the soft-core force (do for ALL particles)
        SoftCoreForceWCA.addParticle([newsigma, epsilon])
        #Also add the particle to the solute only forces
        SoluteCoulForce.addParticle([charge])
        SoluteLJForce.addParticle([sigma, epsilon])
        #If the particle is in the alchemical molecule, need to set it's LJ interactions to zero in original force
        if ind in soluteIndices:
            NBForce.setParticleParameters(ind, charge, sigma, epsilon * 0.0)
            #And keep track of full charge so we can scale it right by lambda
            alchemicalCharges[soluteIndices.index(ind)] = charge

    #Now we need to handle exceptions carefully
    for ind in range(NBForce.getNumExceptions()):
        [p1, p2, excCharge, excSig,
         excEps] = NBForce.getExceptionParameters(ind)
        #For consistency, must add exclusions where we have exceptions for custom forces
        SoftCoreForceWCA.addExclusion(p1, p2)
        SoluteCoulForce.addExclusion(p1, p2)
        SoluteLJForce.addExclusion(p1, p2)

    #Only compute interactions between the alchemical and other particles for the soft-core force
    SoftCoreForceWCA.addInteractionGroup(alchemicalParticles,
                                         chemicalParticles)

    #And only compute alchemical/alchemical interactions for other custom forces
    SoluteCoulForce.addInteractionGroup(alchemicalParticles,
                                        alchemicalParticles)
    SoluteLJForce.addInteractionGroup(alchemicalParticles, alchemicalParticles)

    #Set other soft-core parameters as needed
    SoftCoreForceWCA.setCutoffDistance(12.0 * u.angstroms)
    SoftCoreForceWCA.setNonbondedMethod(mm.CustomNonbondedForce.CutoffPeriodic)
    SoftCoreForceWCA.setUseLongRangeCorrection(False)
    systemRef.addForce(SoftCoreForceWCA)

    #Set other parameters as needed - note that for the solute force would like to set no cutoff
    #However, OpenMM won't allow a bunch of potentials with cutoffs then one without...
    #So as long as the solute is smaller than the cut-off, won't have any problems!
    SoluteCoulForce.setCutoffDistance(12.0 * u.angstroms)
    SoluteCoulForce.setNonbondedMethod(mm.CustomNonbondedForce.CutoffPeriodic)
    SoluteCoulForce.setUseLongRangeCorrection(False)
    systemRef.addForce(SoluteCoulForce)

    SoluteLJForce.setCutoffDistance(12.0 * u.angstroms)
    SoluteLJForce.setNonbondedMethod(mm.CustomNonbondedForce.CutoffPeriodic)
    SoluteLJForce.setUseLongRangeCorrection(False)
    systemRef.addForce(SoluteLJForce)

    #Need to add integrator and context in order to evaluate potential energies
    #Integrator is arbitrary because won't use it
    integrator = mm.VerletIntegrator(1.0 * u.femtoseconds)
    context = mm.Context(systemRef, integrator, platform, prop)

    ##########################################################################

    print("\nStarting analysis...")

    kBT = u.AVOGADRO_CONSTANT_NA * u.BOLTZMANN_CONSTANT_kB * temperature

    #Set up arrays to hold potential energies
    #First row will be coupled, then no electrostatics, then no electrostatics with WCA, then decoupled
    allU = np.array([[]] * 4).T

    #We've now set everything up like we're going to run a simulation
    #But now we will use pytraj to load a trajectory to get coordinates
    #With those coordinates, we will evaluate the energies we want
    #Just need to figure out if we have a surface or a bulk system
    trajFiles = glob.glob('Quad*/%s' % trajFile)
    if len(trajFiles) == 0:
        trajFiles = [trajFile]

    print("Using following trajectory files: %s" % str(trajFiles))

    for aFile in trajFiles:

        trajtop = copy.deepcopy(top)
        trajtop.rb_torsions = pmd.TrackedList(
            [])  #Necessary for SAM systems so doesn't break pytraj
        trajtop = pt.load_parmed(trajtop, traj=False)
        traj = pt.iterload(aFile, top=trajtop, frame_slice=(startFrame, -1))

        thisAllU = np.zeros((len(traj), 4))

        #Loop over the lambda states of interest, looping over whole trajectory each time
        for i, lstate in enumerate([
            [1.0, 1.0, 1.0],  #Fully coupled 
            [1.0, 1.0, 0.0],  #Charge turned off
            [1.0, 0.0,
             0.0],  #Charged turned off, attractions turned off (so WCA)
            [0.0, 1.0, 0.0]
        ]):  #Decoupled (WCA still included, though doesn't matter)

            #Set the lambda state
            context.setParameter('lambdaLJ', lstate[0])
            context.setParameter('lambdaWCA', lstate[1])
            context.setParameter('lambdaQ', lstate[2])
            for k, ind in enumerate(soluteIndices):
                [charge, sig, eps] = NBForce.getParticleParameters(ind)
                NBForce.setParticleParameters(ind,
                                              alchemicalCharges[k] * lstate[2],
                                              sig, eps)
            NBForce.updateParametersInContext(context)

            #And loop over trajectory
            for t, frame in enumerate(traj):

                thisbox = np.array(frame.box.values[:3])
                context.setPeriodicBoxVectors(
                    np.array([thisbox[0], 0.0, 0.0]) * u.angstrom,
                    np.array([0.0, thisbox[1], 0.0]) * u.angstrom,
                    np.array([0.0, 0.0, thisbox[2]]) * u.angstrom)

                thispos = np.array(frame.xyz) * u.angstrom
                context.setPositions(thispos)

                thisAllU[t, i] = context.getState(
                    getEnergy=True).getPotentialEnergy() / kBT

        #Add this trajectory information
        allU = np.vstack((allU, thisAllU))

    #And that should be it, just need to save files and print information
    #avgUq = np.average(allU[0,:] - allU[1,:])
    #stdUq = np.std(allU[0,:] - allU[1,:], ddof=1)
    #avgUlj = np.average(allU[1,:] - allU[2,:])
    #stdUlj = np.std(allU[1,:] - allU[2,:], ddof=1)

    #print("\nAverage solute-water electrostatic potential energy: %f +/- %f"%(avgUq, stdUq))
    #print("Average solute-water LJ potential energy: %f +/- %f"%(avgUlj, stdUlj))

    np.savetxt(
        'pot_energy_decomp.txt',
        allU,
        header=
        'U_coupled (kBT)    U_noQ (kBT)    U_noQ_WCA (kBT)    U_decoupled (kBT) '
    )

    #Print some meaningful information
    #Just make sure we do so as accurately as possible using alchemical information
    alchFile = glob.glob('alchemical_U*.txt')[0]
    print('Using alchemical information file: %s' % alchFile)
    printInfo(allU, mbarFile='mbar_object.pkl', alchFile=alchFile)
def main(args):
    #Get the structure and topology files from the command line
    #ParmEd accepts a wide range of file types (Amber, GROMACS, CHARMM, OpenMM... but not LAMMPS)
    try:
        topFile = args[0]
        strucFile = args[1]
    except IndexError:
        print("Specify topology and structure files from the command line.")
        sys.exit(2)

    print("Using topology file: %s" % topFile)
    print("Using structure file: %s" % strucFile)

    print("\nSetting up system...")

    #Load in the files for initial simulations
    top = pmd.load_file(topFile)
    struc = pmd.load_file(strucFile)

    #Transfer unit cell information to topology object
    top.box = struc.box[:]

    #Set up some global features to use in all simulations
    temperature = 298.15 * u.kelvin

    #Define the platform (i.e. hardware and drivers) to use for running the simulation
    #This can be CUDA, OpenCL, CPU, or Reference
    #CUDA is for NVIDIA GPUs
    #OpenCL is for CPUs or GPUs, but must be used for old CPUs (not SSE4.1 compatible)
    #CPU only allows single precision (CUDA and OpenCL allow single, mixed, or double)
    #Reference is a clear, stable reference for other code development and is very slow, using double precision by default
    platform = mm.Platform.getPlatformByName('CUDA')
    prop = {  #'Threads': '2', #number of threads for CPU - all definitions must be strings (I think)
        'Precision':
        'mixed',  #for CUDA or OpenCL, select the precision (single, mixed, or double)
        'DeviceIndex':
        '0',  #selects which GPUs to use - set this to zero if using CUDA_VISIBLE_DEVICES
        'DeterministicForces':
        'True'  #Makes sure forces with CUDA and PME are deterministic
    }

    #Create the OpenMM system that can be used as a reference
    systemRef = top.createSystem(
        nonbondedMethod=app.
        PME,  #Uses PME for long-range electrostatics, simple cut-off for LJ
        nonbondedCutoff=12.0 *
        u.angstroms,  #Defines cut-off for non-bonded interactions
        rigidWater=True,  #Use rigid water molecules
        constraints=app.HBonds,  #Constrains all bonds involving hydrogens
        flexibleConstraints=
        False,  #Whether to include energies for constrained DOFs
        removeCMMotion=
        False,  #Whether or not to remove COM motion (don't want to if part of system frozen)
    )

    #Set up the integrator to use as a reference
    integratorRef = mm.LangevinIntegrator(
        temperature,  #Temperature for Langevin
        1.0 / u.picoseconds,  #Friction coefficient
        2.0 * u.femtoseconds,  #Integration timestep
    )
    integratorRef.setConstraintTolerance(1.0E-08)

    #To freeze atoms, set mass to zero (does not apply to virtual sites, termed "extra particles" in OpenMM)
    #Here assume (correctly, I think) that the topology indices for atoms correspond to those in the system
    for i, atom in enumerate(top.atoms):
        if atom.type in ('SU'):  #, 'CU', 'CUO'):
            systemRef.setParticleMass(i, 0 * u.dalton)

    #Track non-bonded force, mainly to turn off dispersion correction
    NBForce = None
    for frc in systemRef.getForces():
        if (isinstance(frc, mm.NonbondedForce)):
            NBForce = frc

    #Turn off dispersion correction since have interface
    NBForce.setUseDispersionCorrection(False)

    #Get solute atoms and solute heavy atoms separately
    soluteIndices = []
    heavyIndices = []
    for res in top.residues:
        if res.name not in ['OTM', 'CTM', 'STM', 'NTM', 'SOL']:
            for atom in res.atoms:
                soluteIndices.append(atom.idx)
                if 'H' not in atom.name[0]:
                    heavyIndices.append(atom.idx)

    #JUST for boric acid, add a custom bonded force
    #Couldn't find a nice, compatible force field, but did find A forcefield, so using it
    #But has no angle terms on O-B-O and instead a weird bond repulsion term
    #This term also prevents out of plane bending
    #Simple in our case because boric acid is symmetric, so only need one parameter
    #Parameters come from Otkidach and Pletnev, 2001
    #Here, Ad = (A^2) / (d^6) since Ai and Aj and di and dj are all the same
    #In the original paper, B-OH bond had A = 1.72 and d = 0.354
    #Note that d is dimensionless and A should have units of (Angstrom^3)*(kcal/mol)^(1/2)
    #These units are inferred just to make things work out with kcal/mol and the given distance dependence
    bondRepulsionFunction = 'Ad*(1.0/r)^6'
    BondRepulsionForce = mm.CustomBondForce(bondRepulsionFunction)
    BondRepulsionForce.addPerBondParameter(
        'Ad')  #Units are technically kJ/mol * nm^6
    baOxInds = []
    for aind in soluteIndices:
        if top.atoms[aind].type == 'oh':
            baOxInds.append(aind)
    for i in range(len(baOxInds)):
        for j in range(i + 1, len(baOxInds)):
            BondRepulsionForce.addBond(baOxInds[i], baOxInds[j], [0.006289686])

    systemRef.addForce(BondRepulsionForce)

    #Also get surface SU atoms
    surfIndices = []
    for atom in top.atoms:
        if atom.type == 'SU':
            surfIndices.append(atom.idx)

    startPos = np.array(struc.positions.value_in_unit(u.nanometer))

    print("\nSolute indices: %s" % str(soluteIndices))
    print("Solute heavy atom indices: %s" % str(heavyIndices))
    print("Surface SU atom indices: %s" % str(surfIndices))

    #Solute should already be placed far from the surface
    #If this is not done right, or it is too close to half the periodic box distance, will have issues
    #Either way, set this as the starting reference z distance
    initRefZ = np.average(startPos[heavyIndices, 2]) - np.average(
        startPos[surfIndices, 2])

    print(initRefZ)

    #Will now add a custom bonded force between solute heavy atoms and surface SU atoms
    #Should be in units of kJ/mol*nm^2, but should check this
    #For expanded ensemble, fine to assume solute is less than half the box distance from the surface
    #But for umbrella sampling, want to apply pulling towards surface regardless of whether pull from above or below
    #Also allows us to get further from the surface with our umbrellas without worrying about PBCs
    restraintExpression = '0.5*k*(abs(z2 - z1) - refZ)^2'
    restraintForce = mm.CustomCentroidBondForce(2, restraintExpression)
    restraintForce.addPerBondParameter('k')
    restraintForce.addGlobalParameter(
        'refZ', initRefZ)  #Make global so can modify during simulation
    restraintForce.addGroup(surfIndices, np.ones(
        len(surfIndices)))  #Don't weight with masses
    restraintForce.addGroup(heavyIndices, np.ones(len(heavyIndices)))
    restraintForce.addBond([0, 1], [1000.0])
    restraintForce.setUsesPeriodicBoundaryConditions(
        True)  #Only when doing umbrella sampling
    systemRef.addForce(restraintForce)

    forceLabelsRef = getForceLabels(systemRef)

    decompEnergy(systemRef,
                 struc.positions,
                 labels=forceLabelsRef,
                 verbose=False)

    #Do NVT simulation
    stateFileNVT1, stateNVT1 = doSimNVT(top,
                                        systemRef,
                                        integratorRef,
                                        platform,
                                        prop,
                                        temperature,
                                        pos=struc.positions)

    #Do pulling simulation - really I'm just slowly changing the equilibrium bond distance between the surface and the solute
    stateFilePull, statePull = doSimPull(top,
                                         systemRef,
                                         integratorRef,
                                         platform,
                                         prop,
                                         temperature,
                                         state=stateFileNVT1)

    decompEnergy(systemRef, statePull, labels=forceLabelsRef, verbose=False)

    #Load in pulling restraint data and pulling trajectory to identify starting structures for each umbrella
    pullData = np.loadtxt('pull_restraint.txt')
    trajtop = copy.deepcopy(top)
    trajtop.rb_torsions = pmd.TrackedList([])
    trajtop = pt.load_parmed(trajtop, traj=False)
    pulltraj = pt.iterload('pull.nc', trajtop)
    frameTimes = np.array([frame.time for frame in pulltraj])

    #Define some umbrella distances based on a fixed spacing between initial and final pulling coordinate
    #Actually for final pulling coordinate, close to the surface, using average of actual coordinate and reference
    zSpace = 0.1  #nm
    zUmbs = np.arange(0.5 * (pullData[-1, 1] + pullData[-1, 2]),
                      pullData[0, 2], zSpace)

    print("\nUsing following umbrellas:")
    print(zUmbs)

    #Then loop over umbrellas and run simulation for each
    for i, zRefDist in enumerate(zUmbs):

        os.mkdir("umbrella%i" % i)
        os.chdir("umbrella%i" % i)

        #Find where in the pulling trajectory the solute came closest to this umbrella
        pullDatInd = np.argmin(abs(pullData[:, 2] - zRefDist))
        frameInd = np.argmin(abs(frameTimes - pullData[pullDatInd, 0]))

        #Get starting coordinates
        #Making sure to assign the units that will be returned by pytraj
        thisCoords = np.array(pulltraj[frameInd].xyz) * u.angstrom

        #Set reference value for harmonic force
        restraintForce.setGlobalParameterDefaultValue(0, zRefDist)

        print("\nUmbrella %i:" % i)
        print("\tReference distance: %f" % zRefDist)
        print("\tFrame chosen from trajectory: %i (%f ps)" %
              (frameInd, frameTimes[frameInd]))

        #And run simulations, first equilibrating in NVT, then NPT, then production in NPT
        stateFileNVT, stateNVT = doSimNVT(top,
                                          systemRef,
                                          integratorRef,
                                          platform,
                                          prop,
                                          temperature,
                                          pos=thisCoords)

        stateFileNPT, stateNPT = doSimNPT(top,
                                          systemRef,
                                          integratorRef,
                                          platform,
                                          prop,
                                          temperature,
                                          state=stateFileNVT)

        stateFileProd, stateProd = doSimUmbrella(top,
                                                 systemRef,
                                                 integratorRef,
                                                 platform,
                                                 prop,
                                                 temperature,
                                                 state=stateFileNPT)

        os.chdir("../")
Exemple #9
0
def main(args):

    print('\nReading in files and obtaining LJ information...')

    #First read in topology and trajectory
    topFile = args[0]
    trajFile = args[1]

    top = pmd.load_file(topFile)
    trajtop = copy.deepcopy(top)
    trajtop.rb_torsions = pmd.TrackedList(
        [])  #Necessary for SAM systems so doesn't break pytraj
    trajtop = pt.load_parmed(trajtop, traj=False)
    traj = pt.iterload(trajFile, top=trajtop)

    #Next use parmed to get LJ parameters for all atoms in the solute, as well as water oxygens and surface atoms
    #While go, also collect dictionaries of atomic indices associated with each atom type
    #Will have to check separately when looking at overlaps with solute
    soluteLJ = []
    soluteInds = []
    dictOtherLJ = {}
    dictOtherInds = {}
    for res in top.residues:
        if res.name not in ['OTM', 'CTM', 'STM', 'NTM', 'SOL']:
            for atom in res.atoms:
                soluteInds.append(atom.idx)
                soluteLJ.append([atom.sigma, atom.epsilon])
        else:
            for atom in res.atoms:
                if not atom.type in dictOtherInds:
                    dictOtherInds[atom.type] = [atom.idx]
                else:
                    dictOtherInds[atom.type].append(atom.idx)
                if not atom.type in dictOtherLJ:
                    dictOtherLJ[atom.type] = np.array(
                        [atom.sigma, atom.epsilon])

    soluteLJ = np.array(soluteLJ)

    print(soluteLJ)

    #Use Lorentz-Berthelot combining rules to get LJ parameters between each solute atom and a water oxygen
    dictMixLJ = {}
    for i in range(soluteLJ.shape[0]):
        for akey in dictOtherLJ:
            dictMixLJ['%i_%s' % (i, akey)] = np.array([
                0.5 * (soluteLJ[i, 0] + dictOtherLJ[akey][0]),
                np.sqrt(soluteLJ[i, 1] * dictOtherLJ[akey][1])
            ])

    for key, val in dictMixLJ.iteritems():
        print("%s, %s" % (key, str(val.tolist())))

    print(
        '\nDetermining hard-sphere radii for all combinations of solute and other system atoms...'
    )

    #Next compute hard-sphere radii by integrating according to Barker and Hendersen, Weeks, etc.
    #In order to this right, technically using WCA potential, not LJ
    hsRadii = {}
    rvals = np.arange(0.0, 50.005, 0.005)
    betaLJ = 1.0 / ((1.9872036E-03) * 298.15)
    for i in range(soluteLJ.shape[0]):
        for akey in dictOtherLJ:
            [thisSig, thisEps] = dictMixLJ['%i_%s' % (i, akey)]
            if thisEps == 0.0:
                hsRadii['%i_%s' % (i, akey)] = 0.0
                continue
            thisRmin = thisSig * (2.0**(1.0 / 6.0))
            thisSigdRmin6 = (thisSig / thisRmin)**6
            thisPotRmin = 4.0 * thisEps * ((thisSigdRmin6**2) - thisSigdRmin6)
            thisSigdR6 = (thisSig / rvals)**6
            thisPotVals = 4.0 * thisEps * (
                (thisSigdR6**2) - thisSigdR6) - thisPotRmin
            thisPotVals[np.where(rvals >= thisRmin)] = 0.0
            thisPotVals *= betaLJ
            thisIntegrand = 1.0 - np.exp(-thisPotVals)
            thisIntegrand[0] = 1.0
            hsRadii['%i_%s' % (i, akey)] = simps(thisIntegrand, rvals)
            #Need to multiply by two because will use atom centers to check overlap? Is Rhs distance between centers?

    for key, val in hsRadii.iteritems():
        print("%s, %f" % (key, val))

    #Keep track of hard-sphere radii with water oxygens specially
    solOWhsRadii = np.zeros(soluteLJ.shape[0])
    for i in range(soluteLJ.shape[0]):
        solOWhsRadii[i] = hsRadii['%i_OW_tip4pew' %
                                  i]  #Only using TIP4P/EW here

    print('\nStarting loop over trajectory...')

    #Now loop over trajectory and check if solute is overlapping with any waters OR surface atoms
    #Will create a distribution of overlapping atoms
    numOverlap = np.arange(101.0)
    countOverlap = np.zeros(len(numOverlap))

    #And also track average solute volume that we're trying to insert
    solVol = 0.0

    countFrames = 0
    print('')

    for frame in traj:

        if countFrames % 100 == 0:
            print("On frame %i" % countFrames)

        countFrames += 1

        boxDims = np.array(frame.box.values[:3])

        currCoords = np.array(frame.xyz)

        #Get solute coordinates and make sure solute is whole
        #Then get solute coordinates relative to first solute atom
        #Don't need any other wrapping because wl.nearneighbors will do its own wrapping
        solCoords = currCoords[soluteInds]
        solRefCoords = wl.reimage(solCoords, solCoords[0],
                                  boxDims) - solCoords[0]

        #While we have the coordinates nice, compute solute volume
        #Do this based on hard-sphere radii to water oxygens, which is the most likely case anyway
        solVol += np.sum(wl.spherevolumes(solRefCoords, solOWhsRadii, 0.1))

        #Will shift the solute first atom (and others too) to a number of random locations
        #Since applying restraint (if have surface) Z is drawn from the distribution we want
        #X and Y can be drawn more easily from uniform distributions, so do this to randomize solute position
        numRandXY = 1000
        randX = np.random.random(numRandXY) * boxDims[0]
        randY = np.random.random(numRandXY) * boxDims[1]

        #And will keep track of number of overlapping atoms for each solute random position
        thisTotOverlap = np.zeros(numRandXY, dtype=int)

        #Loop over all non-solute atoms in the system by atom type
        for akey, val in dictOtherInds.iteritems():

            #For this specific atom type, need to keep track of WHICH neighbors overlapping
            #And need to do for EACH solute atom
            #Don't want to double-count if two solute atoms both overlap the same other atom
            overlapBool = np.zeros((numRandXY, len(val)), dtype=int)

            #Now loop over each solute atom
            for i, coord in enumerate(solRefCoords):

                thisRadius = hsRadii['%i_%s' % (i, akey)]
                if thisRadius == 0.0:
                    continue

                #Define coordinates of the current solute atom we're working with
                #So setting first atom to random XY position, then shifting by distance to first atom
                hsCoords = np.zeros((numRandXY, 3))
                hsCoords[:, 0] = coord[0] + randX
                hsCoords[:, 1] = coord[1] + randY
                hsCoords[:, 2] = coord[2] + solCoords[0, 2]

                #Identify boolean for overlapping atoms and add to overall boolean for overlap
                #Note that want OR operation, so adding boolean arrays
                overlapBool += wl.nearneighbors(hsCoords, currCoords[val],
                                                boxDims, 0.0, thisRadius)

            #For this non-solute atom type, add number of atoms overlapping with ANY solute atom
            thisTotOverlap += np.sum(np.array(overlapBool, dtype=bool), axis=1)

        thisBins = np.arange(np.max(thisTotOverlap) + 1)
        countOverlap[thisBins] += np.bincount(thisTotOverlap)

    print(countOverlap.tolist())
    print('Hard-sphere solute insertion probability: %f' %
          (-np.log(countOverlap[0] / np.sum(countOverlap))))

    #Save the distribution to file
    np.savetxt(
        'HS-solute_overlap_hist.txt',
        np.vstack((numOverlap, countOverlap)).T,
        header=
        'Number of non-solute atoms overlapping           Histogram count')

    solVol /= float(len(traj))
    print(
        'Average solute hard-sphere volume (based on water oxygen LJ params): %f'
        % (solVol))
Exemple #10
0
def static_DAT_restraint(
    restraint_mask_list,
    num_window_list,
    ref_structure,
    force_constant,
    continuous_apr=True,
    amber_index=False,
):
    """ Create a static restraint """

    # Setup reference structure
    if isinstance(ref_structure, str):
        ref_structure = utils.return_parmed_structure(ref_structure)
    elif isinstance(ref_structure, pmd.structure.Structure):
        pass
    else:
        raise Exception(
            "static_DAT_restraint does not support the type associated with ref_structure:"
            + type(ref_structure))
    ref_traj = pt.load_parmed(ref_structure, traj=True)

    # Check num_window_list
    if len(num_window_list) != 3:
        raise Exception(
            "The num_window_list needs to contain three integers corresponding to the number of windows in the attach, pull, and release phase, respectively"
        )

    # Setup restraint
    rest = DAT_restraint()
    rest.continuous_apr = continuous_apr
    rest.amber_index = amber_index
    rest.topology = ref_structure
    rest.mask1 = restraint_mask_list[0]
    rest.mask2 = restraint_mask_list[1]
    if len(restraint_mask_list) >= 3:
        rest.mask3 = restraint_mask_list[2]
    if len(restraint_mask_list) == 4:
        rest.mask4 = restraint_mask_list[3]

    # Target value
    mask_string = " ".join(restraint_mask_list)
    if len(restraint_mask_list) == 2:
        # Distance restraint
        target = pt.distance(ref_traj, mask_string)[0]
    elif len(restraint_mask_list) == 3:
        # Angle restraint
        target = pt.angle(ref_traj, mask_string)[0]
    elif len(restraint_mask_list) == 4:
        # Dihedral restraint
        target = pt.dihedral(ref_traj, mask_string)[0]
    else:
        raise Exception(
            "The number of masks (" + str(len(restraint_mask_list)) +
            ") in restraint_mask_list is not 2, 3, or 4 and thus is not one of the supported types: distance, angle, dihedral"
        )

    # Attach phase
    if num_window_list[0] is not None and num_window_list[0] != 0:
        rest.attach["target"] = target
        rest.attach["fc_initial"] = force_constant
        rest.attach["fc_final"] = force_constant
        rest.attach["num_windows"] = num_window_list[0]

    # Pull phase
    if num_window_list[1] is not None and num_window_list[1] != 0:
        rest.pull["fc"] = force_constant
        rest.pull["target_initial"] = target
        rest.pull["target_final"] = target
        rest.pull["num_windows"] = num_window_list[1]

    # Release phase
    if num_window_list[2] is not None and num_window_list[2] != 0:
        rest.release["target"] = target
        rest.release["fc_initial"] = force_constant
        rest.release["fc_final"] = force_constant
        rest.release["num_windows"] = num_window_list[2]

    rest.initialize()

    return rest