def run_mbar(self, test_overlap = True): r"""Runs MBAR free energy estimate """ MBAR_obj = MBAR(self._u_kln, self._N_k, verbose=True) self._f_k = MBAR_obj.f_k try: (deltaF_ij, dDeltaF_ij, theta_ij) = MBAR_obj.getFreeEnergyDifferences() except: (deltaF_ij, dDeltaF_ij, theta_ij) = MBAR_obj.getFreeEnergyDifferences(return_theta=True) self._deltaF_mbar = deltaF_ij[0, self._lambda_array.shape[0]-1] self._dDeltaF_mbar = dDeltaF_ij[0, self._lambda_array.shape[0]-1] self._pmf_mbar = numpy.zeros(shape=(self._lambda_array.shape[0], 3)) self._pmf_mbar[:, 0] = self._lambda_array self._pmf_mbar[:, 1] = self._f_k self._pmf_mbar[:,2] = dDeltaF_ij[0] self._pairwise_F = numpy.zeros(shape=(self._lambda_array.shape[0]-1,4)) self._pairwise_F[:,0] = self._lambda_array[:-1] self._pairwise_F[:,1] = self._lambda_array[1:] self._pairwise_F[:,2] = numpy.diag(deltaF_ij,1) self._pairwise_F[:,3] = numpy.diag(dDeltaF_ij,1) ##testing data overlap: if test_overlap: overlap_matrix = MBAR_obj.computeOverlap() self._overlap_matrix = overlap_matrix[2]
def calcTension(energy_data, verbose=False): dE1 = energy_data[:, 1] - energy_data[:, 0] dE2 = energy_data[:, 2] - energy_data[:, 0] BdE1 = dE1 / kTkJmol BdE2 = dE2 / kTkJmol nstates = 2 nframes = len(dE1) u_kln = np.zeros([nstates, nstates, nframes], np.float64) u_kln[0, 1, :] = BdE1 u_kln[1, 0, :] = BdE2 N_k = np.zeros([nstates], np.int32) # number of uncorrelated samples for k in range(nstates): [nequil, g, Neff_max] = timeseries.detectEquilibration(u_kln[k, k, :]) indices = timeseries.subsampleCorrelatedData(u_kln[k, k, :], g=g) N_k[k] = len(indices) u_kln[k, :, 0:N_k[k]] = u_kln[k, :, indices].T if verbose: print("...found {} uncorrelated samples out of {} total samples...". format(N_k, nframes)) if verbose: print("=== Computing free energy differences ===") mbar = MBAR(u_kln, N_k) [DeltaF_ij, dDeltaF_ij, Theta_ij] = mbar.getFreeEnergyDifferences() tension = DeltaF_ij[ 0, 1] / da * 1e18 * kT #(in J/m^2). note da already has a factor of two for the two areas! tensionError = dDeltaF_ij[0, 1] / da * 1e18 * kT if verbose: print('tension (pymbar): {} +/- {}N/m'.format(tension, tensionError)) return tension, tensionError
def update_logZ_with_mbar(self): """ Use MBAR to update logZ estimates. """ if not self.ncfile: raise Exception("Cannot update logZ using MBAR since no NetCDF file is storing history.") if not self.sampler.update_scheme == 'global-jump': raise Exception("Only global jump is implemented right now.") if not self.ncfile: raise Exception("Must have a storage file attached to use MBAR updates") # Extract relative energies. if self.verbose: print('Updating logZ estimate with MBAR...') initial_time = time.time() from pymbar import MBAR #first = int(self.iteration / 2) first = 0 u_kn = np.array(self.ncfile.variables['u_k'][first:,:]).T [N_k, bins] = np.histogram(self.ncfile.variables['state_index'][first:], bins=(np.arange(self.sampler.nstates+1) - 0.5)) mbar = MBAR(u_kn, N_k) Deltaf_ij, dDeltaf_ij, Theta_ij = mbar.getFreeEnergyDifferences(compute_uncertainty=True, uncertainty_method='approximate') self.logZ[:] = -mbar.f_k[:] self.logZ -= self.logZ[0] final_time = time.time() elapsed_time = final_time - initial_time self._timing['MBAR time'] = elapsed_time if self.verbose: print('MBAR time %8.3f s' % elapsed_time)
def calc_df(u_kln): """ u_kln should be (nstates) x (nstates) x (nframes) note that u_kln should be normalized by kT already where each element is a config from frame `n` of a trajectory conducted with state `k` with energy recalculated using parameters of state `l` """ dims = u_kln.shape if dims[0] != dims[1]: raise ValueError( "dimensions {} of u_kln should be square in the first two indices". format(dims)) nstates = dims[0] N_k = np.zeros([nstates], np.int32) # number of uncorrelated samples for k in range(nstates): [nequil, g, Neff_max] = timeseries.detectEquilibration(u_kln[k, k, :]) indices = timeseries.subsampleCorrelatedData(u_kln[k, k, :], g=g) N_k[k] = len(indices) u_kln[k, :, 0:N_k[k]] = u_kln[k, :, indices].T # Compute free energy differences and statistical uncertainties mbar = MBAR(u_kln, N_k) [DeltaF_ij, dDeltaF_ij, Theta_ij] = mbar.getFreeEnergyDifferences() # save data? return DeltaF_ij, dDeltaF_ij
def sqdeltaW(self, mu_VLE, eps_scaled): ''' Computes the square difference between the sum of the weights in the vapor and liquid phases. Stores the optimal reduced free energy as f_k_guess for future iterations Stores mbar, sumWliq, and sumWvap for computing VLE properties if converged ''' nTsim, U_flat, Nmol_flat, Ncut, f_k_guess, Temp_VLE, u_kn_all, N_k_all = self.nTsim, self.U_flat, self.Nmol_flat, self.Ncut, self.f_k_guess, self.Temp_VLE, self.u_kn_all, self.N_k_all for jT, (Temp, mu) in enumerate(zip(Temp_VLE, mu_VLE)): u_kn_all[nTsim + jT, :] = self.U_to_u(eps_scaled * U_flat, Temp, mu, Nmol_flat) mbar = MBAR(u_kn_all, N_k_all, initial_f_k=f_k_guess) sumWliq = np.sum(mbar.W_nk[:, nTsim:][Nmol_flat > Ncut], axis=0) sumWvap = np.sum(mbar.W_nk[:, nTsim:][Nmol_flat <= Ncut], axis=0) sqdeltaW_VLE = (sumWliq - sumWvap)**2 ### Store previous solutions to speed-up future convergence of MBAR Deltaf_ij = mbar.getFreeEnergyDifferences(return_theta=False)[0] self.f_k_guess = Deltaf_ij[0, :] self.mbar, self.sumWliq, self.sumWvap = mbar, sumWliq, sumWvap return sqdeltaW_VLE
def build_MBAR_sim(self): ''' Creates an instance of the MBAR object for just the simulated state points N_k: contains the number of snapshots from each state point simulated Nmol_kn: contains all of the Number of molecules in 1-d array u_kn_sim: contains all the reduced potential energies just for the simulated points f_k_sim: the converged reduced free energies for each simulated state point (used as initial guess for non-simulated state points) ''' Temp_sim, mu_sim, nSnapshots, Nmol_flat, U_flat = self.Temp_sim, self.mu_sim, self.K_sim, self.Nmol_flat, self.U_flat N_k_sim = np.array(nSnapshots) sumN_k = np.sum(N_k_sim) # Nmol_flat = np.array(N_data_sim).flatten() # U_flat = np.array(U_data_sim).flatten() u_kn_sim = np.zeros([len(Temp_sim), sumN_k]) for iT, (Temp, mu) in enumerate(zip(Temp_sim, mu_sim)): u_kn_sim[iT] = self.U_to_u(U_flat, Temp, mu, Nmol_flat) mbar_sim = MBAR(u_kn_sim, N_k_sim) Deltaf_ij = mbar_sim.getFreeEnergyDifferences(return_theta=False)[0] f_k_sim = Deltaf_ij[0, :] # print(f_k_sim) self.u_kn_sim, self.f_k_sim, self.sumN_k, self.N_k_sim, self.mbar_sim = u_kn_sim, f_k_sim, sumN_k, N_k_sim, mbar_sim
def calc_abs_press_int(self,show_plot=True): ''' Fits ln(Xi) with respect to N for low-density vapor ''' Temp_sim, u_kn_sim,f_k_sim,sumN_k = self.Temp_sim, self.u_kn_sim,self.f_k_sim,self.sumN_k nTsim, U_flat, Nmol_flat,Ncut = self.nTsim, self.U_flat, self.Nmol_flat, self.Ncut Temp_IG = np.min(Temp_sim[self.mu_sim == self.mu_sim.min()]) # print(Temp_IG) mu_IG = np.linspace(2.*self.mu_opt[self.Temp_VLE==Temp_IG],5.*self.mu_opt[self.Temp_VLE==Temp_IG],10) N_k_all = self.K_sim[:] N_k_all.extend([0]*len(mu_IG)) u_kn_IG = np.zeros([len(mu_IG),sumN_k]) u_kn_all = np.concatenate((u_kn_sim,u_kn_IG)) f_k_guess = np.concatenate((f_k_sim,np.zeros(len(mu_IG)))) for jT, mu in enumerate(mu_IG): u_kn_all[nTsim+jT,:] = self.U_to_u(U_flat,Temp_IG,mu,Nmol_flat) mbar = MBAR(u_kn_all,N_k_all,initial_f_k=f_k_guess) sumW_IG = np.sum(mbar.W_nk[:,nTsim:][Nmol_flat<Ncut],axis=0) Nmol_IG = np.sum(mbar.W_nk[:,nTsim:][Nmol_flat<Ncut].T*Nmol_flat[Nmol_flat<Ncut],axis=1)/sumW_IG # print(sumW_IG,Nmol_IG) # print(mbar.W_nk[:,nTsim:][Nmol_flat<Ncut].T) # print(mbar.W_nk[:,nTsim:][Nmol_flat<Ncut].T*Nmol_flat[Nmol_flat<Ncut]) ### Store previous solutions to speed-up future convergence of MBAR Deltaf_ij = mbar.getFreeEnergyDifferences(return_theta=False)[0] f_k_IG = Deltaf_ij[nTsim:,0] # print(f_k_sim,f_k_guess[:nTsim+1],Deltaf_ij[0,:nTsim],f_k_IG)#,Nmol_IG,press_IG,Psat) fit=stats.linregress(Nmol_IG[mu_IG<2.*self.mu_sim.min()],f_k_IG[mu_IG<2.*self.mu_sim.min()]) if show_plot: Nmol_plot = np.linspace(Nmol_IG.min(),Nmol_IG.max(),50) lnXi_plot = fit.intercept + fit.slope*Nmol_plot plt.figure(figsize=[6,6]) plt.plot(Nmol_IG,f_k_IG,'bo',mfc='None',label='MBAR-GCMC') plt.plot(Nmol_plot,lnXi_plot,'k-',label='Linear fit') plt.xlabel('Number of Molecules') plt.ylabel(r'$\ln(\Xi)$') plt.legend() plt.show() print('Slope for ideal gas is 1, actual slope is: '+str(fit.slope)) print('Intercept for absolute pressure is:'+str(fit.intercept)) self.abs_press_int, self.Temp_IG, self.f_k_IG, self.Nmol_IG = fit.intercept, Temp_IG, f_k_IG, Nmol_IG
def run_mbar(self): r"""Runs MBAR free energy estimate """ MBAR_obj = MBAR(self._u_kln, self._N_k, verbose=True) self._f_k = MBAR_obj.f_k (deltaF_ij, dDeltaF_ij, theta_ij) = MBAR_obj.getFreeEnergyDifferences() self._deltaF_mbar = deltaF_ij[0, self._lambda_array.shape[0]-1] self._dDeltaF_mbar = dDeltaF_ij[0, self._lambda_array.shape[0]-1] self._pmf_mbar = np.zeros(shape=(self._lambda_array.shape[0], 2)) self._pmf_mbar[:, 0] = self._lambda_array self._pmf_mbar[:, 1] = self._f_k
def gather_dg(self, u_kln, nstates): # Subsample data to extract uncorrelated equilibrium timeseries N_k = np.zeros([nstates], np.int32) # number of uncorrelated samples for k in range(nstates): [_, g, __] = timeseries.detectEquilibration(u_kln[k, k, :]) indices = timeseries.subsampleCorrelatedData(u_kln[k, k, :], g=g) N_k[k] = len(indices) u_kln[k, :, 0:N_k[k]] = u_kln[k, :, indices].T # Compute free energy differences and statistical uncertainties mbar = MBAR(u_kln, N_k) [DeltaF_ij, dDeltaF_ij, _] = mbar.getFreeEnergyDifferences() print("Number of uncorrelated samples per state: {}".format(N_k)) return DeltaF_ij, dDeltaF_ij
def test_mbar_free_energies(): """Can MBAR calculate moderately correct free energy differences?""" for system_generator in system_generators: name, test = system_generator() x_n, u_kn, N_k_output, s_n = test.sample(N_k, mode='u_kn') eq(N_k, N_k_output) mbar = MBAR(u_kn, N_k) results = mbar.getFreeEnergyDifferences(return_dict=True) fe_t, dfe_t = mbar.getFreeEnergyDifferences(return_dict=False) fe = results['Delta_f'] fe_sigma = results['dDelta_f'] eq(fe, fe_t) eq(fe_sigma, dfe_t) fe, fe_sigma = fe[0, 1:], fe_sigma[0, 1:] fe0 = test.analytical_free_energies() fe0 = fe0[1:] - fe0[0] z = (fe - fe0) / fe_sigma eq(z / z_scale_factor, np.zeros(len(z)), decimal=0)
def test_exponential_mbar_free_energies(): """Exponential Distribution Test: can MBAR calculate correct free energy differences?""" test = exponential_distributions.ExponentialTestCase(rates) x_kn, u_kln, N_k_output = test.sample(N_k, mode='u_kln') eq(N_k, N_k_output) mbar = MBAR(u_kln, N_k) fe, fe_sigma = mbar.getFreeEnergyDifferences() fe, fe_sigma = fe[0,1:], fe_sigma[0,1:] fe0 = test.analytical_free_energies() fe0 = fe0[1:] - fe0[0] z = (fe - fe0) / fe_sigma eq(z / z_scale_factor, np.zeros(len(z)), decimal=0)
def test_harmonic_oscillators_mbar_free_energies(): """Harmonic Oscillators Test: can MBAR calculate correct free energy differences?""" test = harmonic_oscillators.HarmonicOscillatorsTestCase(O_k, k_k) x_kn, u_kln, N_k_output = test.sample(N_k) eq(N_k, N_k_output) mbar = MBAR(u_kln, N_k) fe, fe_sigma = mbar.getFreeEnergyDifferences() fe, fe_sigma = fe[0,1:], fe_sigma[0,1:] fe0 = test.analytical_free_energies() fe0 = fe0[1:] - fe0[0] z = (fe - fe0) / fe_sigma eq(z / z_scale_factor, np.zeros(len(z)), decimal=0)
def test_harmonic_oscillators_mbar_free_energies(): """Harmonic Oscillators Test: can MBAR calculate correct free energy differences?""" test = harmonic_oscillators.HarmonicOscillatorsTestCase(O_k, k_k) x_n, u_kn, origin = test.sample(N_k) u_ijn, N_k_output = convert_ukn_to_uijn(u_kn) eq(N_k, N_k_output.values) mbar = MBAR(u_ijn.values, N_k) fe, fe_sigma = mbar.getFreeEnergyDifferences() fe, fe_sigma = fe[0], fe_sigma[0] fe0 = test.analytical_free_energies() z = (fe - fe0) / fe_sigma z = z[1:] # First component is undetermined. eq(z / z_scale_factor, np.zeros(len(z)), decimal=0)
def test_exponential_mbar_free_energies(): """Exponential Distribution Test: can MBAR calculate correct free energy differences?""" test = exponential_distributions.ExponentialTestCase(rates) x_n, u_kn, origin = test.sample(N_k) u_ijn, N_k_output = convert_ukn_to_uijn(u_kn) eq(N_k, N_k_output.values) mbar = MBAR(u_ijn.values, N_k) fe, fe_sigma = mbar.getFreeEnergyDifferences() fe, fe_sigma = fe[0], fe_sigma[0] fe0 = test.analytical_free_energies() z = (fe - fe0) / fe_sigma z = z[1:] # First component is undetermined. eq(z / z_scale_factor, np.zeros(len(z)), decimal=0)
def test_mbar_free_energies(): """Can MBAR calculate moderately correct free energy differences?""" for system_generator in system_generators: name, test = system_generator() x_n, u_kn, N_k_output, s_n = test.sample(N_k, mode='u_kn') eq(N_k, N_k_output) mbar = MBAR(u_kn, N_k) fe, fe_sigma, Theta_ij = mbar.getFreeEnergyDifferences() fe, fe_sigma = fe[0, 1:], fe_sigma[0, 1:] fe0 = test.analytical_free_energies() fe0 = fe0[1:] - fe0[0] z = (fe - fe0) / fe_sigma eq(z / z_scale_factor, np.zeros(len(z)), decimal=0)
def test_mbar_free_energies(): """Can MBAR calculate moderately correct free energy differences?""" for system_generator in system_generators: name, test = system_generator() x_n, u_kn, N_k_output, s_n = test.sample(N_k, mode='u_kn') eq(N_k, N_k_output) mbar = MBAR(u_kn, N_k) fe, fe_sigma, Theta_ij = mbar.getFreeEnergyDifferences() fe, fe_sigma = fe[0,1:], fe_sigma[0,1:] fe0 = test.analytical_free_energies() fe0 = fe0[1:] - fe0[0] z = (fe - fe0) / fe_sigma eq(z / z_scale_factor, np.zeros(len(z)), decimal=0)
def gather_dg(self, u_kln, nstates): u_kln = np.vstack(u_kln) # Subsample data to extract uncorrelated equilibrium timeseries N_k = np.zeros([nstates], np.int32) # number of uncorrelated samples for k in range(nstates): [_, g, __] = timeseries.detectEquilibration(u_kln[k, k, :]) indices = timeseries.subsampleCorrelatedData(u_kln[k, k, :], g=g) N_k[k] = len(indices) u_kln[k, :, 0:N_k[k]] = u_kln[k, :, indices].T # Compute free energy differences and statistical uncertainties mbar = MBAR(u_kln, N_k) [DeltaF_ij, dDeltaF_ij, _] = mbar.getFreeEnergyDifferences() logger.debug( "Number of uncorrelated samples per state: {}".format(N_k)) logger.debug("Relative free energy change for {0} = {1} +- {2}".format( self.name, DeltaF_ij[0, nstates - 1] * self.kTtokcal, dDeltaF_ij[0, nstates - 1] * self.kTtokcal)) return DeltaF_ij[0, nstates - 1] * self.kTtokcal, dDeltaF_ij[0, nstates - 1] * self.kTtokcal
def run_mbar(self, ndiscard=0, nuse=None): """Estimate free energies of all alchemical states. Parameters ---------- ndiscard : int, optinoal, default=0 number of iterations to discard to equilibration nuse : int, optional, default=None maximum number of iterations to use (after discarding) Returns ------- Deltaf_ij : np.ndarray, shape=(n_states, n_states) The statewise free energy differences dDeltaf_ij : np.ndarray, shape=(n_states, n_states) The statewise free energy difference uncertainties """ u_kln_replica, u_kln, u_n = self.get_u_kln() u_kln_replica, u_kln, u_n, N_k, N = self.equilibrate_and_subsample(u_kln_replica, u_kln, u_n, ndiscard=ndiscard, nuse=nuse) logger.info("Initialing MBAR and computing free energy differences...") mbar = MBAR(u_kln, N_k, verbose = False, method = 'self-consistent-iteration', maximum_iterations = 50000) # use slow self-consistent-iteration (the default) # Get matrix of dimensionless free energy differences and uncertainty estimate. logger.info("Computing covariance matrix...") (Deltaf_ij, dDeltaf_ij) = mbar.getFreeEnergyDifferences(uncertainty_method='svd-ew') logger.info("\n%-24s %16s\n%s" % ("Deltaf_ij", "current state", pd.DataFrame(Deltaf_ij).to_string())) logger.info("\n%-24s %16s\n%s" % ("Deltaf_ij", "current state", pd.DataFrame(dDeltaf_ij).to_string())) return (Deltaf_ij, dDeltaf_ij)
# print 'E_%d evaluated in model_%d'%(k,l), u_kln[k,l,n] # Initialize MBAR with reduced energies u_kln and number of uncorrelated configurations from each state N_k. # # u_kln[k,l,n] is the reduced potential energy beta*U_l(x_kn), where U_l(x) is the potential energy function for state l, # beta is the inverse temperature, and and x_kn denotes uncorrelated configuration n from state k. # # N_k[k] is the number of configurations from state k stored in u_knm # # Note that this step may take some time, as the relative dimensionless free energies f_k are determined at this point. mbar = MBAR(u_kln, N_k) # Extract dimensionless free energy differences and their statistical uncertainties. #(Deltaf_ij, dDeltaf_ij) = mbar.getFreeEnergyDifferences() (Deltaf_ij, dDeltaf_ij, Theta_ij) = mbar.getFreeEnergyDifferences(uncertainty_method='approximate') #print 'Deltaf_ij', Deltaf_ij #print 'dDeltaf_ij', dDeltaf_ij beta = 1.0 # keep in units kT #print 'Unit-bearing (units kT) free energy difference f_1K = f_K - f_1: %f +- %f' % ( (1./beta) * Deltaf_ij[0,K-1], (1./beta) * dDeltaf_ij[0,K-1]) f_df = np.zeros( (nlambda, 2) ) # first column is Deltaf_ij[0,:], second column is dDeltaf_ij[0,:] f_df[:,0] = Deltaf_ij[0,:] f_df[:,1] = dDeltaf_ij[0,:] print 'Writing %s...'%args.bayesfactorfile savetxt(args.bayesfactorfile, f_df) print '...Done.' # Compute the expectation of some observable A(x) at each state i, and associated uncertainty matrix. # Here, A_kn[k,n] = A(x_{kn}) #(A_k, dA_k) = mbar.computeExpectations(A_kn)
def estimate_free_energies(ncfile, ndiscard=0, nuse=None, g=None): """ Estimate free energies of all alchemical states. Parameters ---------- ncfile : NetCDF Input YANK netcdf file ndiscard : int, optional, default=0 Number of iterations to discard to equilibration nuse : int, optional, default=None Maximum number of iterations to use (after discarding) g : int, optional, default=None Statistical inefficiency to use if desired; if None, will be computed. TODO ---- * Automatically determine 'ndiscard'. """ # Get current dimensions. niterations = ncfile.variables['energies'].shape[0] nstates = ncfile.variables['energies'].shape[1] natoms = ncfile.variables['energies'].shape[2] # Extract energies. logger.info("Reading energies...") energies = ncfile.variables['energies'] u_kln_replica = np.zeros([nstates, nstates, niterations], np.float64) for n in range(niterations): u_kln_replica[:, :, n] = energies[n, :, :] logger.info("Done.") # Deconvolute replicas logger.info("Deconvoluting replicas...") u_kln = np.zeros([nstates, nstates, niterations], np.float64) for iteration in range(niterations): state_indices = ncfile.variables['states'][iteration, :] u_kln[state_indices, :, iteration] = energies[iteration, :, :] logger.info("Done.") # Compute total negative log probability over all iterations. u_n = np.zeros([niterations], np.float64) for iteration in range(niterations): u_n[iteration] = np.sum(np.diagonal(u_kln[:, :, iteration])) #logger.info(u_n # DEBUG outfile = open('u_n.out', 'w') for iteration in range(niterations): outfile.write("%8d %24.3f\n" % (iteration, u_n[iteration])) outfile.close() # Discard initial data to equilibration. u_kln_replica = u_kln_replica[:, :, ndiscard:] u_kln = u_kln[:, :, ndiscard:] u_n = u_n[ndiscard:] # Truncate to number of specified conforamtions to use if (nuse): u_kln_replica = u_kln_replica[:, :, 0:nuse] u_kln = u_kln[:, :, 0:nuse] u_n = u_n[0:nuse] # Subsample data to obtain uncorrelated samples N_k = np.zeros(nstates, np.int32) indices = timeseries.subsampleCorrelatedData( u_n, g=g) # indices of uncorrelated samples #print u_n # DEBUG #indices = range(0,u_n.size) # DEBUG - assume samples are uncorrelated N = len(indices) # number of uncorrelated samples N_k[:] = N u_kln[:, :, 0:N] = u_kln[:, :, indices] logger.info("number of uncorrelated samples:") logger.info(N_k) logger.info("") #=================================================================================================== # Estimate free energy difference with MBAR. #=================================================================================================== # Initialize MBAR (computing free energy estimates, which may take a while) logger.info("Computing free energy differences...") mbar = MBAR(u_kln, N_k) # Get matrix of dimensionless free energy differences and uncertainty estimate. logger.info("Computing covariance matrix...") try: # pymbar 2 (Deltaf_ij, dDeltaf_ij) = mbar.getFreeEnergyDifferences() except ValueError: # pymbar 3 (Deltaf_ij, dDeltaf_ij, theta_ij) = mbar.getFreeEnergyDifferences() # # Matrix of free energy differences logger.info("Deltaf_ij:") for i in range(nstates): str_row = "" for j in range(nstates): str_row += "%8.3f" % Deltaf_ij[i, j] logger.info(str_row) # print Deltaf_ij # # Matrix of uncertainties in free energy difference (expectations standard deviations of the estimator about the true free energy) logger.info("dDeltaf_ij:") for i in range(nstates): str_row = "" for j in range(nstates): str_row += "%8.3f" % dDeltaf_ij[i, j] logger.info(str_row) # Return free energy differences and an estimate of the covariance. return (Deltaf_ij, dDeltaf_ij)
def compute_hydration_energy(entry, parameters, hydration_factory_parameters, platform_name="Reference"): """ Compute hydration energy of a single molecule given a GBSA parameter set. ARGUMENTS molecule (OEMol) - molecule with GBSA atom types parameters (dict) - parameters for GBSA atom types RETURNS energy (float) - hydration energy in kcal/mol """ platform = openmm.Platform.getPlatformByName(platform_name) from pymbar import MBAR gbmodel = hydration_factory_parameters['gbmodel'].value molecule = entry['molecule'] iupac_name = entry['iupac'] cid = molecule.GetData('cid') # Retrieve OpenMM System. vacuum_system = entry['system'] solvent_system = copy.deepcopy(entry['system']) # Get nonbonded force. forces = { solvent_system.getForce(index).__class__.__name__ : solvent_system.getForce(index) for index in range(solvent_system.getNumForces()) } nonbonded_force = forces['NonbondedForce'] # Add GBSA force. from simtk.openmm.app.internal import customgbforces if gbmodel is None: gbsa_force = openmm.GBSAOBCForce() gbsa_force.setNonbondedMethod(openmm.GBSAOBCForce.NoCutoff) # set no cutoff gbsa_force.setSoluteDielectric(1) gbsa_force.setSolventDielectric(78) elif gbmodel == 0: gbsa_force = customgbforces.GBSAHCTForce(SA='ACE') elif gbmodel == 1: gbsa_force = customgbforces.GBSAOBC1Force(SA='ACE') elif gbmodel == 2: gbsa_force = customgbforces.GBSAOBC2Force(SA='ACE') elif gbmodel == 3: gbsa_force = customgbforces.GBSAGBnForce(SA='ACE') elif gbmodel == 4: gbsa_force = customgbforces.GBSAGBn2Force(SA='ACE') else: print("GBmodel %i out of range" % gbmodel) # Build indexable list of atoms. atoms = [atom for atom in molecule.GetAtoms()] natoms = len(atoms) # Assign GBSA parameters. for (atom_index, atom) in enumerate(atoms): [charge, sigma, epsilon] = nonbonded_force.getParticleParameters(atom_index) atomtype = atom.GetStringData("gbsa_type") # GBSA atomtype radius = parameters['%s_%s' % (atomtype, 'radius')] * units.angstroms scalingFactor = parameters['%s_%s' % (atomtype, 'scalingFactor')] if gbmodel is None: gbsa_force.addParticle(charge, radius, scalingFactor) else: gbsa_force.addParticle([charge, radius, scalingFactor]) # Add the force to the system. solvent_system.addForce(gbsa_force) # Create context for solvent system. timestep = 2.0 * units.femtosecond solvent_integrator = openmm.VerletIntegrator(timestep) solvent_context = openmm.Context(solvent_system, solvent_integrator, platform) # Create context for vacuum system. vacuum_integrator = openmm.VerletIntegrator(timestep) vacuum_context = openmm.Context(vacuum_system, vacuum_integrator, platform) # Compute energy differences. temperature = entry['temperature'] kT = kB * temperature beta = 1.0 / kT initial_time = time.time() x_n = entry['x_n'] u_n = entry['u_n'] nsamples = len(u_n) nstates = 3 # number of thermodynamic states u_kln = np.zeros([3,3,nsamples], np.float64) for sample in range(nsamples): positions = units.Quantity(x_n[sample,:,:], units.nanometers) u_kln[0,0,sample] = u_n[sample] vacuum_context.setPositions(positions) vacuum_state = vacuum_context.getState(getEnergy=True) u_kln[0,1,sample] = beta * vacuum_state.getPotentialEnergy() solvent_context.setPositions(positions) solvent_state = solvent_context.getState(getEnergy=True) u_kln[0,2,sample] = beta * solvent_state.getPotentialEnergy() N_k = np.zeros([nstates], np.int32) N_k[0] = nsamples mbar = MBAR(u_kln, N_k) try: df_ij, ddf_ij, _ = mbar.getFreeEnergyDifferences() except linalg.LinAlgError: return np.inf DeltaG_in_kT = df_ij[1,2] dDeltaG_in_kT = ddf_ij[1,2] final_time = time.time() elapsed_time = final_time - initial_time #print "%48s | %48s | reweighting took %.3f s" % (cid, iupac_name, elapsed_time) # Clean up. del solvent_context, solvent_integrator del vacuum_context, vacuum_integrator energy = kT * DeltaG_in_kT print "%48s | %48s | DeltaG = %.3f +- %.3f kT | gbmodel = %d" % (cid, iupac_name, DeltaG_in_kT, dDeltaG_in_kT, gbmodel) #print "" return energy / units.kilocalories_per_mole
# Generate independent data samples from K one-dimensional harmonic oscillators centered at q = 0. #============================================================================================= randomsample = testsystems.harmonic_oscillators.HarmonicOscillatorsTestCase(O_k=O_k, K_k=K_k, beta=beta) [x_kn,u_kln,N_k] = randomsample.sample(N_k,mode='u_kln') # get the unreduced energies U_kln = u_kln/beta #============================================================================================= # Estimate free energies and expectations. #============================================================================================= # Initialize the MBAR class, determining the free energies. mbar = MBAR(u_kln, N_k, relative_tolerance=1.0e-10,verbose=False) # use fast Newton-Raphson solver results = mbar.getFreeEnergyDifferences() Deltaf_ij_estimated = results['Delta_f'] dDeltaf_ij_estimated = results['dDelta_f'] # Compute error from analytical free energy differences. Deltaf_ij_error = Deltaf_ij_estimated - Deltaf_ij_analytical # Estimate the expectation of the mean-squared displacement at each condition. if observe == 'RMS displacement': A_kn = numpy.zeros([K,K,N_max], dtype = numpy.float64); for k in range(0,K): for l in range(0,K): A_kn[k,l,0:N_k[k]] = (x_kn[k,0:N_k[k]] - O_k[l])**2 # observable is the squared displacement # observable is the potential energy, a 3D array since the potential energy is a function of # thermodynamic state
def MBAR_analysis(self, debug = False): """MBAR analysis for populations and BICePs score""" # load necessary data first self.load_data() # Suppose the energies sampled from each simulation are u_kln, where u_kln[k,l,n] is the reduced potential energy # of snapshot n \in 1,...,N_k of simulation k \in 1,...,K evaluated at reduced potential for state l. self.K = self.nlambda # number of thermodynamic ensembles # N_k[k] will denote the number of correlated snapshots from state k N_k = np.array( [len(self.traj[i]['trajectory']) for i in range(self.nlambda)] ) nsnaps = N_k.max() u_kln = np.zeros( (self.K, self.K, nsnaps) ) nstates = int(self.states) print 'nstates', nstates states_kn = np.zeros( (self.K, nsnaps) ) # Get snapshot energies rescored in the different ensembles """['step', 'E', 'accept', 'state', 'sigma_noe', 'sigma_J', 'sigma_cs', 'sigma_pf''gamma'] [int(step), float(self.E), int(accept), int(self.state), int(self.sigma_noe_index), int(self.sigma_J_index), int(self.sigma_cs_H_index), int(self.sigma_cs_Ha_index), int(self.sigma_cs_N_index), int(self.sigma_cs_Ca_index), int(self.sigma_pf_index), int(self.gamma_index)] """ for n in range(nsnaps): for k in range(self.K): for l in range(self.K): if debug: print 'step', self.traj[k]['trajectory'][n][0], if k==l: print 'E%d evaluated in model %d'%(k,k), self.traj[k]['trajectory'][n][1], u_kln[k,k,n] = self.traj[k]['trajectory'][n][1] state, sigma_noe_index, sigma_J_index, sigma_cs_H_index, sigma_cs_Ha_index, sigma_cs_N_index, sigma_cs_Ca_index, sigma_pf_index, gamma_index = self.traj[k]['trajectory'][n][3:] # IMPORTANT: make sure the order of these parameters is the same as the way they are saved in PosteriorSampler print 'state, sigma_noe_index, sigma_J_index, sigma_cs_H_index, sigma_cs_Ha_index, sigma_cs_N_index, sigma_cs_Ca_index, sigma_pf_index, gamma_index', state, sigma_noe_index, sigma_J_index, sigma_cs_H_index, sigma_cs_Ha_index, sigma_cs_N_index, sigma_cs_Ca_index, sigma_pf_index, gamma_index states_kn[k,n] = state sigma_noe = self.traj[k]['allowed_sigma_noe'][sigma_noe_index] sigma_J = self.traj[k]['allowed_sigma_J'][sigma_J_index] sigma_cs_H = self.traj[k]['allowed_sigma_cs_H'][sigma_cs_H_index] sigma_cs_Ha = self.traj[k]['allowed_sigma_cs_Ha'][sigma_cs_Ha_index] sigma_cs_N = self.traj[k]['allowed_sigma_cs_N'][sigma_cs_N_index] sigma_cs_Ca = self.traj[k]['allowed_sigma_cs_Ca'][sigma_cs_Ca_index] sigma_pf = self.traj[k]['allowed_sigma_pf'][sigma_pf_index] u_kln[k,l,n] = self.sampler[l].neglogP(0, state, sigma_noe, sigma_J, sigma_cs_H, sigma_cs_Ha, sigma_cs_N, sigma_cs_Ca, sigma_pf, gamma_index) if debug: print 'E_%d evaluated in model_%d'%(k,l), u_kln[k,l,n] # Initialize MBAR with reduced energies u_kln and number of uncorrelated configurations from each state N_k. # u_kln[k,l,n] is the reduced potential energy beta*U_l(x_kn), where U_l(x) is the potential energy function for state l, # beta is the inverse temperature, and and x_kn denotes uncorrelated configuration n from state k. # N_k[k] is the number of configurations from state k stored in u_knm # Note that this step may take some time, as the relative dimensionless free energies f_k are determined at this point. mbar = MBAR(u_kln, N_k) # Extract dimensionless free energy differences and their statistical uncertainties. # (Deltaf_ij, dDeltaf_ij) = mbar.getFreeEnergyDifferences() #(Deltaf_ij, dDeltaf_ij, Theta_ij) = mbar.getFreeEnergyDifferences(uncertainty_method='svd-ew') (Deltaf_ij, dDeltaf_ij, Theta_ij) = mbar.getFreeEnergyDifferences(uncertainty_method='approximate') #print 'Deltaf_ij', Deltaf_ij #print 'dDeltaf_ij', dDeltaf_ij beta = 1.0 # keep in units kT #print 'Unit-bearing (units kT) free energy difference f_1K = f_K - f_1: %f +- %f' % ( (1./beta) * Deltaf_ij[0,K-1], (1./beta) * dDeltaf_ij[0,K-1]) self.f_df = np.zeros( (self.nlambda, 2) ) # first column is Deltaf_ij[0,:], second column is dDeltaf_ij[0,:] self.f_df[:,0] = Deltaf_ij[0,:] self.f_df[:,1] = dDeltaf_ij[0,:] # Compute the expectation of some observable A(x) at each state i, and associated uncertainty matrix. # Here, A_kn[k,n] = A(x_{kn}) #(A_k, dA_k) = mbar.computeExpectations(A_kn) self.P_dP = np.zeros( (nstates, 2*self.K) ) # left columns are P, right columns are dP if debug: print 'state\tP\tdP' for i in range(nstates): A_kn = np.where(states_kn==i,1,0) (p_i, dp_i) = mbar.computeExpectations(A_kn, uncertainty_method='approximate') self.P_dP[i,0:self.K] = p_i self.P_dP[i,self.K:2*self.K] = dp_i print i for p in p_i: print p, for dp in dp_i: print dp, print pops, dpops = self.P_dP[:,0:self.K], self.P_dP[:,self.K:2*self.K] # save results self.save_MBAR()
def SimulateAlchemy(path, niter, nsteps_per_iter, nlambda): """Calculates the binding free energy of a ligand names 'UNL' using alchemy. One step corresponds to two femtoseconds. """ prmtop = app.AmberPrmtopFile(f'{path}/com.prmtop') inpcrd = app.AmberInpcrdFile(f'{path}/com.inpcrd') system = prmtop.createSystem(implicitSolvent=app.GBn2, nonbondedMethod=app.CutoffNonPeriodic, nonbondedCutoff=1.0 * unit.nanometers, constraints=app.HBonds, rigidWater=True, ewaldErrorTolerance=0.0005) # Detect ligand indices ligand_ind = [] for atm in prmtop.topology.atoms(): # OpenEye make the ligand name 'UNL' if atm.residue.name == 'UNL': ligand_ind.append(atm.index) ligand_ind = set(ligand_ind) AddAlchemyForces(system, ligand_ind) integrator = mm.LangevinIntegrator(300 * unit.kelvin, 1.0 / unit.picoseconds, 2.0 * unit.femtoseconds) integrator.setConstraintTolerance(0.00001) # TODO: The issues here are the same as the mmgbsa.py script # TODO: This should just recognize whatever the computer is capable of, not force CUDA. # TODO: I am not sure if mixed precision is necessary. Just need to be consistent platform = mm.Platform.getPlatformByName('CUDA') properties = {'CudaPrecision': 'mixed'} simulation = app.Simulation(prmtop.topology, system, integrator, platform) simulation.context.setPositions(inpcrd.positions) simulation.minimizeEnergy() ### Now simulate system import numpy as np from pymbar import MBAR, timeseries lambdas = np.linspace(1.0, 0.0, nlambda) # Save the potential energies for MBAR u_kln = np.zeros([nlambda, nlambda, niter]) kT = unit.AVOGADRO_CONSTANT_NA * unit.BOLTZMANN_CONSTANT_kB * integrator.getTemperature( ) # TODO: This runs in series. Someone comfortable with MPI should help parallelize this. for k in range(nlambda): for i in range(niter): print('state %5d iteration %5d / %5d' % (k, i, niter)) simulation.context.setParameter('lambda', lambdas[k]) integrator.step(nsteps_per_iter) for l in range(nlambda): simulation.context.setParameter('lambda', lambdas[l]) u_kln[k, l, i] = simulation.context.getState( getEnergy=True).getPotentialEnergy() / kT # Subsample to reduce variation N_k = np.zeros([nlambda], np.int32) # number of uncorrelated samples for k in range(nlambda): [t0, g, Neff_max] = timeseries.detectEquilibration(u_kln[k, k, :]) # TODO: maybe should use 't0:' instead of ':' in third index indices = timeseries.subsampleCorrelatedData(u_kln[k, k, :], g=g) N_k[k] = len(indices) u_kln[k, :, 0:N_k[k]] = u_kln[k, :, indices].T # Calculate the energy difference # TODO: I've never worked with pymbar beyond the timeseries function. I'm not sure how the error in DeltaF is calculated, and I don't know what Theta is right now. mbar = MBAR(u_kln, N_k) [DeltaF_ij, dDeltaF_ij, Theta_ij] = mbar.getFreeEnergyDifferences() return DeltaF_ij[0][-1], dDeltaF_ij[0][-1]
weights = mbar.getWeights() # Store the weights for plot comparisons later on for sample in range(0, len(weights)): for state in range(0, num_states): # print(weights_for_each_num_states[num_states_index][state][sample]) weights_for_each_num_states[num_states_index][state][ sample] = weights[sample][state] ############# # # 5) Calculate dimensionless free energies with MBAR # ############# # Get the dimensionless free energy differences free_energies, uncertainty_free_energies = mbar.getFreeEnergyDifferences( )[0], mbar.getFreeEnergyDifferences()[1] # print("With "+str(num_states)+" states the free energies are:") # Save the free energies for this number of states for comparison plots later on for sample in range(0, len(free_energies)): for state in range(0, num_states): free_energies_for_each_num_states[num_states_index][sample][ state] = free_energies[sample][state] state_temps_for_each_num_states[num_states_index][ state] = T_state_center[state] state_energies[ state] = state_energies[state] + free_energies[sample][state] # Calculate the averate total energy for the samples within each state (temperature window) for state in range(0, num_states): state_energies_for_each_num_states[num_states_index][ state] = state_energies[state] / len(free_energies)
def estimate_free_energies(ncfile, ndiscard = 0, nuse = None): """Estimate free energies of all alchemical states. ARGUMENTS ncfile (NetCDF) - input YANK netcdf file OPTIONAL ARGUMENTS ndiscard (int) - number of iterations to discard to equilibration nuse (int) - maximum number of iterations to use (after discarding) TODO: Automatically determine 'ndiscard'. """ # Get current dimensions. niterations = ncfile.variables['energies'].shape[0] nstates = ncfile.variables['energies'].shape[1] natoms = ncfile.variables['energies'].shape[2] # Extract energies. print "Reading energies..." energies = ncfile.variables['energies'] u_kln_replica = zeros([nstates, nstates, niterations], float64) for n in range(niterations): u_kln_replica[:,:,n] = energies[n,:,:] print "Done." # Deconvolute replicas print "Deconvoluting replicas..." u_kln = zeros([nstates, nstates, niterations], float64) for iteration in range(niterations): state_indices = ncfile.variables['states'][iteration,:] u_kln[state_indices,:,iteration] = energies[iteration,:,:] print "Done." # Compute total negative log probability over all iterations. u_n = zeros([niterations], float64) for iteration in range(niterations): u_n[iteration] = sum(diagonal(u_kln[:,:,iteration])) #print u_n # DEBUG outfile = open('u_n.out', 'w') for iteration in range(niterations): outfile.write("%8d %24.3f\n" % (iteration, u_n[iteration])) outfile.close() # Discard initial data to equilibration. u_kln_replica = u_kln_replica[:,:,ndiscard:] u_kln = u_kln[:,:,ndiscard:] u_n = u_n[ndiscard:] # Truncate to number of specified conforamtions to use if (nuse): u_kln_replica = u_kln_replica[:,:,0:nuse] u_kln = u_kln[:,:,0:nuse] u_n = u_n[0:nuse] # Subsample data to obtain uncorrelated samples N_k = zeros(nstates, int32) indices = timeseries.subsampleCorrelatedData(u_n) # indices of uncorrelated samples #indices = range(0,u_n.size) # DEBUG - assume samples are uncorrelated N = len(indices) # number of uncorrelated samples N_k[:] = N u_kln[:,:,0:N] = u_kln[:,:,indices] print "number of uncorrelated samples:" print N_k print "" #=================================================================================================== # Estimate free energy difference with MBAR. #=================================================================================================== # Initialize MBAR (computing free energy estimates, which may take a while) print "Computing free energy differences..." mbar = MBAR(u_kln, N_k, verbose = False, method = 'adaptive', maximum_iterations = 50000) # use slow self-consistent-iteration (the default) #mbar = MBAR(u_kln, N_k, verbose = False, method = 'self-consistent-iteration', maximum_iterations = 50000) # use slow self-consistent-iteration (the default) #mbar = MBAR(u_kln, N_k, verbose = True, method = 'Newton-Raphson') # use faster Newton-Raphson solver # Get matrix of dimensionless free energy differences and uncertainty estimate. print "Computing covariance matrix..." (Deltaf_ij, dDeltaf_ij) = mbar.getFreeEnergyDifferences(uncertainty_method='svd-ew') # # Matrix of free energy differences print "Deltaf_ij:" for i in range(nstates): for j in range(nstates): print "%8.3f" % Deltaf_ij[i,j], print "" # print Deltaf_ij # # Matrix of uncertainties in free energy difference (expectations standard deviations of the estimator about the true free energy) print "dDeltaf_ij:" for i in range(nstates): for j in range(nstates): print "%8.3f" % dDeltaf_ij[i,j], print "" # Return free energy differences and an estimate of the covariance. return (Deltaf_ij, dDeltaf_ij)
# get uncorrelated samples print("=== Getting uncorrelated samples===") N_k = np.zeros([nstates], np.int32) # number of uncorrelated samples for k in range(nstates): [nequil, g, Neff_max] = timeseries.detectEquilibration(u_kln[k, k, :]) indices = timeseries.subsampleCorrelatedData(u_kln[k, k, :], g=g) N_k[k] = len(indices) u_kln[k, :, 0:N_k[k]] = u_kln[k, :, indices].T print("...found {} uncorrelated samples...".format(N_k)) np.save('{}_ukln'.format(args.outprefix), u_kln) # Compute free energy differences and statistical uncertainties print("=== Computing free energy differences ===") mbar = MBAR(u_kln, N_k) [DeltaF_ij, dDeltaF_ij, Theta_ij] = mbar.getFreeEnergyDifferences() np.savetxt('{}_DeltaF.dat'.format(args.outprefix), DeltaF_ij) np.savetxt('{}_dDeltaF.dat'.format(args.outprefix), dDeltaF_ij) # Print out one line summary #tension = DeltaF_ij[0,1]/2/da * 1e18 #tensionError = dDeltaF_ij[0,1]/2/da * 1e18 tension = DeltaF_ij[ 0, 1] / da * 1e18 * kT #(in J/m^2). note da already has a factor of two for the two areas! tensionError = dDeltaF_ij[0, 1] / da * 1e18 * kT print('tension (pymbar): {} +/- {}N/m'.format(tension, tensionError)) with open('{}_results.txt'.format(args.outprefix), "a") as f: f.write('\nUsing pymbar:\n')
mbar_same_total_samples = MBAR(u_kn_same_total_samples, state_counts_same_total_samples, verbose=False, relative_tolerance=1e-12) # Get the 'weights', or reweighted mixture distribution weights = mbar.getWeights() weights_same_total_samples = mbar_same_total_samples.getWeights() # Store the weights for later analysis weights_for_each_num_states.extend([weights]) weights_for_each_num_states_same_total_samples.extend([weights_same_total_samples]) ############# # # 6) Calculate dimensionless free energies with MBAR # ############# # Get the dimensionless free energy differences, and uncertainties in their values free_energies,uncertainty_free_energies = mbar.getFreeEnergyDifferences()[0],mbar.getFreeEnergyDifferences()[1] # Save the free energies free_energies_for_each_num_states.extend([free_energies]) # Save the uncertainty in the free energy uncertainties_for_each_num_states.extend([uncertainty_free_energies]) # Get the dimensionless free energy differences and uncertainties for the uniform sampling approach free_energies,uncertainty_free_energies = mbar_same_total_samples.getFreeEnergyDifferences()[0],mbar_same_total_samples.getFreeEnergyDifferences()[1] # Save the data free_energies_for_each_num_states_same_total_samples.extend([free_energies]) uncertainties_for_each_num_states_same_total_samples.extend([uncertainty_free_energies]) ############# # # 7) Calculate < R_Na-Cl > with MBAR # #############
nstates, m, k = np.shape(u_kln) l = np.linspace(0, 1, nstates) # Subsample data to extract uncorrelated equilibrium timeseries N_k = np.zeros([nstates], np.int32) # number of uncorrelated samples for k in range(nstates): [nequil, g, Neff_max] = timeseries.detectEquilibration(u_kln[k, k, :]) indices = timeseries.subsampleCorrelatedData(u_kln[k, k, :], g=g) N_k[k] = len(indices) u_kln[k, :, 0:N_k[k]] = u_kln[k, :, indices].T # Compute free energy differences and statistical uncertainties mbar = MBAR(u_kln, N_k) [DeltaF_ij, dDeltaF_ij, Theta_ij] = mbar.getFreeEnergyDifferences(return_theta=True) #results = mbar.getFreeEnergyDifferences(return_dict=True,return_theta=True) #DeltaF_ij = results['Delta_f'] #dDeltaF_ij = results['dDelta_f'] #Theta_ij = results['Theta'] ODeltaF_ij = mbar.computeOverlap()['matrix'] # Print results f = open(Savename, 'w') for i in range(nstates): f.writelines("%.2f: %9.4f +- %.4f\n" % (l[i], DeltaF_ij[i, 0] * kT, dDeltaF_ij[i, 0] * kT)) f.close() # Plot Overlap fig1, ax1 = plt.subplots()
print 'E_%d evaluated in model_%d'%(k,l), u_kln[k,l,n] # Initialize MBAR with reduced energies u_kln and number of uncorrelated configurations from each state N_k. # # u_kln[k,l,n] is the reduced potential energy beta*U_l(x_kn), where U_l(x) is the potential energy function for state l, # beta is the inverse temperature, and and x_kn denotes uncorrelated configuration n from state k. # # N_k[k] is the number of configurations from state k stored in u_knm # # Note that this step may take some time, as the relative dimensionless free energies f_k are determined at this point. mbar = MBAR(u_kln, N_k) # Extract dimensionless free energy differences and their statistical uncertainties. (Deltaf_ij, dDeltaf_ij) = mbar.getFreeEnergyDifferences() print 'Deltaf_ij', Deltaf_ij print 'dDeltaf_ij', dDeltaf_ij beta = 1.0 # keep in units kT print 'Unit-bearing (units kT) free energy difference f_1K = f_K - f_1: %f +- %f' % ( (1./beta) * Deltaf_ij[0,K-1], (1./beta) * dDeltaf_ij[0,K-1]) f_df = np.zeros( (nlambda, 2) ) # first column is Deltaf_ij[0,:], second column is dDeltaf_ij[0,:] f_df[:,0] = Deltaf_ij[0,:] f_df[:,1] = dDeltaf_ij[0,:] print 'Writing %s...'%args.bayesfactorfile savetxt(args.bayesfactorfile, f_df) print '...Done.' # Compute the expectation of some observable A(x) at each state i, and associated uncertainty matrix. # Here, A_kn[k,n] = A(x_{kn}) #(A_k, dA_k) = mbar.computeExpectations(A_kn)
kT = unit.AVOGADRO_CONSTANT_NA * unit.BOLTZMANN_CONSTANT_kB * integrator.getTemperature() for k in range(nstates): for iteration in range(niterations): print('state %5d iteration %5d / %5d' % (k, iteration, niterations)) # Set alchemical state context.setParameter('lambda', lambdas[k]) # Run some dynamics integrator.step(nsteps) # Compute energies at all alchemical states for l in range(nstates): context.setParameter('lambda', lambdas[l]) u_kln[k,l,iteration] = context.getState(getEnergy=True).getPotentialEnergy() / kT # Estimate free energy of Lennard-Jones particle insertion from pymbar import MBAR, timeseries # Subsample data to extract uncorrelated equilibrium timeseries N_k = np.zeros([nstates], np.int32) # number of uncorrelated samples for k in range(nstates): [nequil, g, Neff_max] = timeseries.detectEquilibration(u_kln[k,k,:]) indices = timeseries.subsampleCorrelatedData(u_kln[k,k,:], g=g) N_k[k] = len(indices) u_kln[k,:,0:N_k[k]] = u_kln[k,:,indices].T # Compute free energy differences and statistical uncertainties mbar = MBAR(u_kln, N_k) [DeltaF_ij, dDeltaF_ij, Theta_ij] = mbar.getFreeEnergyDifferences() print('DeltaF_ij (kT):') print(DeltaF_ij) print('dDeltaF_ij (kT):') print(dDeltaF_ij)
def estimate_free_energies(ncfile, ndiscard=0, nuse=None, g=1.0, replicas=None): """Estimate free energies of all alchemical states. ARGUMENTS ncfile (NetCDF) - input YANK netcdf file OPTIONAL ARGUMENTS ndiscard (int) - number of iterations to discard to equilibration (default: 0) nuse (int) - maximum number of iterations to use (after discarding) (default: None) g (float) - statistical inefficiency to use for subsampleing (default: 1.0) replicas (list of int) - if specified, only use these replicas for estimating the free energies (default: None) TODO: Automatically determine 'ndiscard'. """ # Get current dimensions. niterations = ncfile.variables['energies'].shape[0] nstates = ncfile.variables['energies'].shape[1] natoms = ncfile.variables['energies'].shape[2] # Extract energies. energies = ncfile.variables['energies'] u_kln_replica = numpy.zeros([nstates, nstates, niterations], numpy.float64) for n in range(niterations): u_kln_replica[:,:,n] = energies[n,:,:] # Extract states. states_kn_replica = numpy.zeros([nstates, niterations], numpy.int32) for n in range(niterations): states_kn_replica[:,n] = ncfile.variables['states'][n,:] # Discard initial data to equilibration. u_kln_replica = u_kln_replica[:,:,ndiscard:] states_kn_replica = states_kn_replica[:,ndiscard:] # If specified, truncate to number of specified conformations to use. if (nuse): u_kln_replica = u_kln_replica[:,:,0:nuse] states_kn_replica = states_kn_replica[:,0:nuse] # Subsample data to obtain uncorrelated samples A_n = u_kln_replica[0,0,:] indices = timeseries.subsampleCorrelatedData(A_n, g=g) # indices of uncorrelated samples N = len(indices) # number of uncorrelated samples u_kln_replica[:,:,0:N] = u_kln_replica[:,:,indices] states_kn_replica[:,0:N] = states_kn_replica[:,indices] # Deconvolute replicas to obtain energies by state. u_kln = numpy.zeros([nstates, nstates, N], numpy.float64) if replicas is None: # Use all replicas. N_k = N * numpy.ones(nstates, numpy.int32) for n in range(N): state_indices = states_kn_replica[:,n] u_kln[state_indices,:,n] = u_kln_replica[:,:,n] else: # Use only specified replicas. N_k = numpy.zeros(nstates, numpy.int32) for n in range(N): state_indices = ncfile.variables['states'][n,:] for replica in replicas: state_index = states_kn_replica[replica,n] u_kln[state_index,:,N_k[state_index]] = u_kln_replica[replica,:,n] N_k[state_index] += 1 #=================================================================================================== # Estimate free energy difference with MBAR. #=================================================================================================== # Initialize MBAR (computing free energy estimates, which may take a while) mbar = MBAR(u_kln, N_k, verbose = False, maximum_iterations = 50000) # use slow self-consistent-iteration (the default) # Get matrix of dimensionless free energy differences and uncertainty estimate. (Deltaf_ij, dDeltaf_ij) = mbar.getFreeEnergyDifferences(uncertainty_method='svd-ew') # Return free energy differences and an estimate of the covariance. return (Deltaf_ij, dDeltaf_ij)
def free_energy_trace(self, discard_from_start=1, n_trace=10): """ Trace the free energy by keeping fewer and fewer samples in both forward and reverse direction Returns ------- free_energy_trace_figure : matplotlib.figure Figure showing the equilibration between both phases """ trace_spacing = 1.0/n_trace def format_trace_plot(plot: plt.Axes, trace_forward: np.ndarray, trace_reverse: np.ndarray): x = np.arange(n_trace + 1)[1:] * trace_spacing * 100 plot.errorbar(x, trace_forward[:, 0], yerr=2 * trace_forward[:, 1], ecolor='b', elinewidth=0, mec='none', mew=0, linestyle='None', zorder=10) plot.plot(x, trace_forward[:, 0], 'b-', marker='o', mec='b', mfc='w', label='Forward', zorder=20,) plot.errorbar(x, trace_reverse[:, 0], yerr=2 * trace_reverse[:, 1], ecolor='r', elinewidth=0, mec='none', mew=0, linestyle='None', zorder=10) plot.plot(x, trace_reverse[:, 0], 'r-', marker='o', mec='r', mfc='w', label='Reverse', zorder=20) y_fill_upper = [trace_forward[-1, 0] + 2 * trace_forward[-1, 1]] * 2 y_fill_lower = [trace_forward[-1, 0] - 2 * trace_forward[-1, 1]] * 2 xlim = [0, 100] plot.fill_between(xlim, y_fill_lower, y_fill_upper, color='orchid', zorder=5) plot.set_xlim(xlim) plot.legend() plot.set_xlabel("% Samples Analyzed", fontsize=20) plot.set_ylabel(r"$\Delta G$ in kcal/mol", fontsize=20) # Adjust figure size plt.rcParams['figure.figsize'] = 15, 6 * (self.nphases + 1) * 2 plot_grid = gridspec.GridSpec(self.nphases + 1, 1) # Vertical distribution free_energy_trace_figure = plt.figure() # Add some space between the figures free_energy_trace_figure.subplots_adjust(hspace=0.4) traces = {} for i, phase_name in enumerate(self.phase_names): traces[phase_name] = {} if phase_name not in self._serialized_data: self._serialized_data[phase_name] = {} serial = self._serialized_data[phase_name] if "free_energy" not in serial: serial["free_energy"] = {} serial = serial["free_energy"] free_energy_trace_f = np.zeros([n_trace, 2], dtype=float) free_energy_trace_r = np.zeros([n_trace, 2], dtype=float) p = free_energy_trace_figure.add_subplot(plot_grid[i]) analyzer = self.analyzers[phase_name] kcal = analyzer.kT / units.kilocalorie_per_mole # Data crunching to get timeseries sampled_energies, _, _, states = analyzer.read_energies() n_replica, n_states, _ = sampled_energies.shape # Sample at index 0 is actually the minimized structure and NOT from the equilibrium distribution # This throws off all of the equilibrium data sampled_energies = sampled_energies[:, :, discard_from_start:] states = states[:, discard_from_start:] total_iterations = sampled_energies.shape[-1] for trace_factor in range(n_trace, 0, -1): # Reverse order tracing trace_percent = trace_spacing*trace_factor j = trace_factor - 1 # Indexing kept_iterations = int(np.ceil(trace_percent*total_iterations)) u_forward = sampled_energies[:, :, :kept_iterations] s_forward = states[:, :kept_iterations] u_reverse = sampled_energies[:, :, -1:-kept_iterations-1:-1] s_reverse = states[:, -1:-kept_iterations - 1:-1] for energy_sub, state_sub, storage in [ (u_forward, s_forward, free_energy_trace_f), (u_reverse, s_reverse, free_energy_trace_r)]: u_n = analyzer.get_effective_energy_timeseries(energies=energy_sub, replica_state_indices=state_sub) i_t, g_i, n_effective_i = analyze.multistate.get_equilibration_data_per_sample(u_n) i_max = n_effective_i.argmax() number_equilibrated = i_t[i_max] g_t = g_i[i_max] if not self.use_full_trajectory: energy_sub = analyze.multistate.utils.remove_unequilibrated_data(energy_sub, number_equilibrated, -1) state_sub = analyze.multistate.utils.remove_unequilibrated_data(state_sub, number_equilibrated, -1) energy_sub = analyze.multistate.utils.subsample_data_along_axis(energy_sub, g_t, -1) state_sub = analyze.multistate.utils.subsample_data_along_axis(state_sub, g_t, -1) samples_per_state = np.zeros([n_states], dtype=int) unique_sampled_states, counts = np.unique(state_sub, return_counts=True) # Assign those counts to the correct range of states samples_per_state[unique_sampled_states] = counts mbar = MBAR(energy_sub, samples_per_state) fe_data = mbar.getFreeEnergyDifferences(compute_uncertainty=True) # Trap theta_ij output try: fe, dfe, _ = fe_data except ValueError: fe, dfe = fe_data ref_i, ref_j = analyzer.reference_states storage[j, :] = fe[ref_i, ref_j] * kcal, dfe[ref_i, ref_j] * kcal format_trace_plot(p, free_energy_trace_f, free_energy_trace_r) p.set_title("{} Phase".format(phase_name.title()), fontsize=20) traces[phase_name]['f'] = free_energy_trace_f traces[phase_name]['r'] = free_energy_trace_r serial['forward'] = free_energy_trace_f.tolist() serial['reverse'] = free_energy_trace_r.tolist() # Finally handle last combined plot combined_trace_f = np.zeros([n_trace, 2], dtype=float) combined_trace_r = np.zeros([n_trace, 2], dtype=float) for phase_name in self.phase_names: phase_f = traces[phase_name]['f'] phase_r = traces[phase_name]['r'] combined_trace_f[:, 0] += phase_f[:, 0] combined_trace_f[:, 1] = np.sqrt(combined_trace_f[:, 1]**2 + phase_f[:, 1]**2) combined_trace_r[:, 0] += phase_r[:, 0] combined_trace_r[:, 1] = np.sqrt(combined_trace_r[:, 1] ** 2 + phase_r[:, 1] ** 2) p = free_energy_trace_figure.add_subplot(plot_grid[-1]) format_trace_plot(p, combined_trace_f, combined_trace_r) p.set_title("Combined Phases", fontsize=20) return free_energy_trace_figure
def compute_hydration_energy(entry, parameters, platform_name="CPU"): """ Compute hydration energy of a single molecule given a GBSA parameter set. ARGUMENTS molecule (OEMol) - molecule with GBSA atom types parameters (dict) - parameters for GBSA atom types RETURNS energy (float) - hydration energy in kcal/mol """ platform = openmm.Platform.getPlatformByName('CPU') from pymbar import MBAR timestep = 2 * units.femtoseconds molecule = entry['molecule'] iupac_name = entry['iupac'] cid = molecule.GetData('cid') # Retrieve OpenMM System. vacuum_system = entry['system'] solvent_system = copy.deepcopy(entry['solvated_system']) # Get nonbonded force. forces = { solvent_system.getForce(index).__class__.__name__ : solvent_system.getForce(index) for index in range(solvent_system.getNumForces()) } nonbonded_force = forces['NonbondedForce'] gbsa_force = forces['CustomGBForce'] # Build indexable list of atoms. atoms = [atom for atom in molecule.GetAtoms()] natoms = len(atoms) # Create context for solvent system. timestep = 2.0 * units.femtosecond solvent_integrator = openmm.VerletIntegrator(timestep) # Create context for vacuum system. vacuum_integrator = openmm.VerletIntegrator(timestep) # Assign GBSA parameters. for (atom_index, atom) in enumerate(atoms): [charge, sigma, epsilon] = nonbonded_force.getParticleParameters(atom_index) atomtype = atom.GetStringData("gbsa_type") # GBSA atomtype radius = parameters['%s_%s' % (atomtype, 'radius')] * units.angstroms scalingFactor = parameters['%s_%s' % (atomtype, 'scalingFactor')] gbsa_force.setParticleParameters(atom_index, [charge, radius, scalingFactor]) solvent_context = openmm.Context(solvent_system, solvent_integrator,platform) vacuum_context = openmm.Context(vacuum_system, vacuum_integrator, platform) # Compute energy differences. temperature = entry['temperature'] kT = kB * temperature beta = 1.0 / kT initial_time = time.time() x_n = entry['x_n'] u_n = entry['u_n'] nsamples = len(u_n) nstates = 3 # number of thermodynamic states u_kln = np.zeros([3,3,nsamples], np.float64) for sample in range(nsamples): positions = units.Quantity(x_n[sample,:,:], units.nanometers) u_kln[0,0,sample] = u_n[sample] vacuum_context.setPositions(positions) vacuum_state = vacuum_context.getState(getEnergy=True) u_kln[0,1,sample] = beta * vacuum_state.getPotentialEnergy() solvent_context.setPositions(positions) solvent_state = solvent_context.getState(getEnergy=True) u_kln[0,2,sample] = beta * solvent_state.getPotentialEnergy() N_k = np.zeros([nstates], np.int32) N_k[0] = nsamples mbar = MBAR(u_kln, N_k) try: df_ij, ddf_ij, _ = mbar.getFreeEnergyDifferences() except linalg.LinAlgError: return np.inf DeltaG_in_kT = df_ij[1,2] dDeltaG_in_kT = ddf_ij[1,2] final_time = time.time() elapsed_time = final_time - initial_time #print "%48s | %48s | reweighting took %.3f s" % (cid, iupac_name, elapsed_time) # Clean up. del solvent_context, solvent_integrator del vacuum_context, vacuum_integrator energy = kT * DeltaG_in_kT #print "%48s | %48s | DeltaG = %.3f +- %.3f kT " % (cid, iupac_name, energy, dDeltaG_in_kT) print(DeltaG_in_kT) print(type(DeltaG_in_kT)) return DeltaG_in_kT
#try an on the fly mbar estimation # Subsample data to extract uncorrelated equilibrium timeseries N_k = np.zeros([nstates], np.int32) # number of uncorrelated samples for k in range(nstates): [nequil, g, Neff_max] = timeseries.detectEquilibration(u_kln[k, k, :]) indices = timeseries.subsampleCorrelatedData(u_kln[k, k, :], g=g) N_k[k] = len(indices) u_kln[k, :, 0:N_k[k]] = u_kln[k, :, indices].T # Compute free energy differences and statistical uncertainties mbar = MBAR(u_kln, N_k, verbose=True, method="adaptive", relative_tolerance=1e-10) #, initialize="BAR") [DeltaF_ij, dDeltaF_ij, Theta_ij] = mbar.getFreeEnergyDifferences(uncertainty_method='svd-ew') print('DeltaF_ij (kcal/mol):') print(DeltaF_ij[0, nstates - 1] * 298.0 * 0.001987204) mbar_fe = DeltaF_ij[0, nstates - 1] * 298.0 * 0.001987204 dmbar_fe = dDeltaF_ij[0, nstates - 1] * 298.0 * 0.001987204 #write the free energy mbar_file = open("freenrg-MBAR.dat", "w") mbar_file.write("\n") mbar_file.write("Free energy differences matrix from MBAR in reduced units:") mbar_file.write(DeltaF_ij) mbar_file.write("\n") mbar_file.write("Free energy MBAR: %.4f +/- %.4f\n" (mbar_fe, dmbar_fe)) mbar_file.close()
print("======================================") print(" Initializing MBAR ") print("======================================") # Estimate free energies from simulation using MBAR. print("Estimating relative free energies from simulation (this may take a while)...") # Initialize the MBAR class, determining the free energies. mbar = MBAR(u_kln, N_k, relative_tolerance=1.0e-10, verbose=True) # Get matrix of dimensionless free energy differences and uncertainty estimate. print("=============================================") print(" Testing getFreeEnergyDifferences ") print("=============================================") results = mbar.getFreeEnergyDifferences() Delta_f_ij_estimated = results['Delta_f'] dDelta_f_ij_estimated = results['dDelta_f'] # Compute error from analytical free energy differences. Delta_f_ij_error = Delta_f_ij_estimated - Delta_f_ij_analytical print("Error in free energies is:") print(Delta_f_ij_error) print("Uncertainty in free energies is:") print(dDelta_f_ij_estimated) print("Standard deviations away is:") # mathematical manipulation to avoid dividing by zero errors; we don't care # about the diagnonals, since they are identically zero. df_ij_mod = dDelta_f_ij_estimated + numpy.identity(K)
def estimate_free_energies(ncfile, ndiscard=0, nuse=None): """Estimate free energies of all alchemical states. ARGUMENTS ncfile (NetCDF) - input YANK netcdf file OPTIONAL ARGUMENTS ndiscard (int) - number of iterations to discard to equilibration nuse (int) - maximum number of iterations to use (after discarding) TODO: Automatically determine 'ndiscard'. """ # Get current dimensions. niterations = ncfile.variables['energies'].shape[0] nstates = ncfile.variables['energies'].shape[1] natoms = ncfile.variables['energies'].shape[2] # Extract energies. print "Reading energies..." energies = ncfile.variables['energies'] u_kln_replica = zeros([nstates, nstates, niterations], float64) for n in range(niterations): u_kln_replica[:, :, n] = energies[n, :, :] print "Done." # Deconvolute replicas print "Deconvoluting replicas..." u_kln = zeros([nstates, nstates, niterations], float64) for iteration in range(niterations): state_indices = ncfile.variables['states'][iteration, :] u_kln[state_indices, :, iteration] = energies[iteration, :, :] print "Done." # Compute total negative log probability over all iterations. u_n = zeros([niterations], float64) for iteration in range(niterations): u_n[iteration] = sum(diagonal(u_kln[:, :, iteration])) #print u_n # DEBUG outfile = open('u_n.out', 'w') for iteration in range(niterations): outfile.write("%8d %24.3f\n" % (iteration, u_n[iteration])) outfile.close() # Discard initial data to equilibration. u_kln_replica = u_kln_replica[:, :, ndiscard:] u_kln = u_kln[:, :, ndiscard:] u_n = u_n[ndiscard:] # Truncate to number of specified conforamtions to use if (nuse): u_kln_replica = u_kln_replica[:, :, 0:nuse] u_kln = u_kln[:, :, 0:nuse] u_n = u_n[0:nuse] # Subsample data to obtain uncorrelated samples N_k = zeros(nstates, int32) indices = timeseries.subsampleCorrelatedData( u_n) # indices of uncorrelated samples #indices = range(0,u_n.size) # DEBUG - assume samples are uncorrelated N = len(indices) # number of uncorrelated samples N_k[:] = N u_kln[:, :, 0:N] = u_kln[:, :, indices] print "number of uncorrelated samples:" print N_k print "" #=================================================================================================== # Estimate free energy difference with MBAR. #=================================================================================================== # Initialize MBAR (computing free energy estimates, which may take a while) print "Computing free energy differences..." mbar = MBAR(u_kln, N_k, verbose=False, method='adaptive', maximum_iterations=50000 ) # use slow self-consistent-iteration (the default) #mbar = MBAR(u_kln, N_k, verbose = False, method = 'self-consistent-iteration', maximum_iterations = 50000) # use slow self-consistent-iteration (the default) #mbar = MBAR(u_kln, N_k, verbose = True, method = 'Newton-Raphson') # use faster Newton-Raphson solver # Get matrix of dimensionless free energy differences and uncertainty estimate. print "Computing covariance matrix..." (Deltaf_ij, dDeltaf_ij) = mbar.getFreeEnergyDifferences(uncertainty_method='svd-ew') # # Matrix of free energy differences print "Deltaf_ij:" for i in range(nstates): for j in range(nstates): print "%8.3f" % Deltaf_ij[i, j], print "" # print Deltaf_ij # # Matrix of uncertainties in free energy difference (expectations standard deviations of the estimator about the true free energy) print "dDeltaf_ij:" for i in range(nstates): for j in range(nstates): print "%8.3f" % dDeltaf_ij[i, j], print "" # Return free energy differences and an estimate of the covariance. return (Deltaf_ij, dDeltaf_ij)
u_kln_subsampled = numpy.zeros([K, K, nprod_iterations], numpy.float64) # subsampled data for k in range(K): # Get indices of uncorrelated samples. indices = subsampleCorrelatedData(u_kln[k, k, :]) # Store only uncorrelated data. N_k[k] = len(indices) for l in range(K): u_kln_subsampled[k, l, 0:len(indices)] = u_kln[k, l, indices] print "Number of uncorrelated samples per state:" print N_k # ============================================================================= # Analyze with MBAR to compute free energy differences and statistical errors. # ============================================================================= print "Analyzing with MBAR..." mbar = MBAR(u_kln_subsampled, N_k) [Deltaf_ij, dDeltaf_ij] = mbar.getFreeEnergyDifferences() print "Free energy differences (in kT)" print Deltaf_ij print "Statistical errors (in kT)" print dDeltaf_ij # ============================================================================= # Report result. # ============================================================================= print "Free energy of inserting argon particle: %.3f +- %.3f kT" % ( Deltaf_ij[0, K - 1], dDeltaf_ij[0, K - 1])
def free_energy_trace(self, discard_from_start=1, n_trace=10): """ Trace the free energy by keeping fewer and fewer samples in both forward and reverse direction Returns ------- free_energy_trace_figure : matplotlib.figure Figure showing the equilibration between both phases """ trace_spacing = 1.0/n_trace def format_trace_plot(plot: plt.Axes, trace_forward: np.ndarray, trace_reverse: np.ndarray): x = np.arange(n_trace + 1)[1:] * trace_spacing * 100 plot.errorbar(x, trace_forward[:, 0], yerr=2 * trace_forward[:, 1], ecolor='b', elinewidth=0, mec='none', mew=0, linestyle='None', zorder=10) plot.plot(x, trace_forward[:, 0], 'b-', marker='o', mec='b', mfc='w', label='Forward', zorder=20,) plot.errorbar(x, trace_reverse[:, 0], yerr=2 * trace_reverse[:, 1], ecolor='r', elinewidth=0, mec='none', mew=0, linestyle='None', zorder=10) plot.plot(x, trace_reverse[:, 0], 'r-', marker='o', mec='r', mfc='w', label='Reverse', zorder=20) y_fill_upper = [trace_forward[-1, 0] + 2 * trace_forward[-1, 1]] * 2 y_fill_lower = [trace_forward[-1, 0] - 2 * trace_forward[-1, 1]] * 2 xlim = [0, 100] plot.fill_between(xlim, y_fill_lower, y_fill_upper, color='orchid', zorder=5) plot.set_xlim(xlim) plot.legend() plot.set_xlabel("% Samples Analyzed", fontsize=20) plot.set_ylabel(r"$\Delta G$ in kcal/mol", fontsize=20) # Adjust figure size plt.rcParams['figure.figsize'] = 15, 6 * (self.nphases + 1) * 2 plot_grid = gridspec.GridSpec(self.nphases + 1, 1) # Vertical distribution free_energy_trace_figure = plt.figure() # Add some space between the figures free_energy_trace_figure.subplots_adjust(hspace=0.4) traces = {} for i, phase_name in enumerate(self.phase_names): traces[phase_name] = {} if phase_name not in self._serialized_data: self._serialized_data[phase_name] = {} serial = self._serialized_data[phase_name] if "free_energy" not in serial: serial["free_energy"] = {} serial = serial["free_energy"] free_energy_trace_f = np.zeros([n_trace, 2], dtype=float) free_energy_trace_r = np.zeros([n_trace, 2], dtype=float) p = free_energy_trace_figure.add_subplot(plot_grid[i]) analyzer = self.analyzers[phase_name] kcal = analyzer.kT / units.kilocalorie_per_mole # Data crunching to get timeseries sampled_energies, _, _, states = analyzer.read_energies() n_replica, n_states, _ = sampled_energies.shape # Sample at index 0 is actually the minimized structure and NOT from the equilibrium distribution # This throws off all of the equilibrium data sampled_energies = sampled_energies[:, :, discard_from_start:] states = states[:, discard_from_start:] total_iterations = sampled_energies.shape[-1] for trace_factor in range(n_trace, 0, -1): # Reverse order tracing trace_percent = trace_spacing*trace_factor j = trace_factor - 1 # Indexing kept_iterations = int(np.ceil(trace_percent*total_iterations)) u_forward = sampled_energies[:, :, :kept_iterations] s_forward = states[:, :kept_iterations] u_reverse = sampled_energies[:, :, -1:-kept_iterations-1:-1] s_reverse = states[:, -1:-kept_iterations - 1:-1] for energy_sub, state_sub, storage in [ (u_forward, s_forward, free_energy_trace_f), (u_reverse, s_reverse, free_energy_trace_r)]: u_n = analyzer.get_effective_energy_timeseries(energies=energy_sub, replica_state_indices=state_sub) i_t, g_i, n_effective_i = analyze.multistate.get_equilibration_data_per_sample(u_n) i_max = n_effective_i.argmax() number_equilibrated = i_t[i_max] g_t = g_i[i_max] if not self.use_full_trajectory: energy_sub = analyze.multistate.utils.remove_unequilibrated_data(energy_sub, number_equilibrated, -1) state_sub = analyze.multistate.utils.remove_unequilibrated_data(state_sub, number_equilibrated, -1) energy_sub = analyze.multistate.utils.subsample_data_along_axis(energy_sub, g_t, -1) state_sub = analyze.multistate.utils.subsample_data_along_axis(state_sub, g_t, -1) samples_per_state = np.zeros([n_states], dtype=int) unique_sampled_states, counts = np.unique(state_sub, return_counts=True) # Assign those counts to the correct range of states samples_per_state[unique_sampled_states] = counts mbar = MBAR(energy_sub, samples_per_state) fe_data = mbar.getFreeEnergyDifferences(compute_uncertainty=True) # Trap theta_ij output try: fe, dfe, _ = fe_data except ValueError: fe, dfe = fe_data ref_i, ref_j = analyzer.reference_states storage[j, :] = fe[ref_i, ref_j] * kcal, dfe[ref_i, ref_j] * kcal format_trace_plot(p, free_energy_trace_f, free_energy_trace_r) p.set_title("{} Phase".format(phase_name.title()), fontsize=20) traces[phase_name]['f'] = free_energy_trace_f traces[phase_name]['r'] = free_energy_trace_r serial['forward'] = free_energy_trace_f.tolist() serial['reverse'] = free_energy_trace_r.tolist() # Finally handle last combined plot combined_trace_f = np.zeros([n_trace, 2], dtype=float) combined_trace_r = np.zeros([n_trace, 2], dtype=float) for phase_name in self.phase_names: phase_f = traces[phase_name]['f'] phase_r = traces[phase_name]['r'] combined_trace_f[:, 0] += phase_f[:, 0] combined_trace_f[:, 1] = np.sqrt(combined_trace_f[:, 1]**2 + phase_f[:, 1]**2) combined_trace_r[:, 0] += phase_r[:, 0] combined_trace_r[:, 1] = np.sqrt(combined_trace_r[:, 1] ** 2 + phase_r[:, 1] ** 2) p = free_energy_trace_figure.add_subplot(plot_grid[-1]) format_trace_plot(p, combined_trace_f, combined_trace_r) p.set_title("Combined Phases", fontsize=20) return free_energy_trace_figure
# Generate independent data samples from K one-dimensional harmonic oscillators centered at q = 0. #============================================================================================= randomsample = testsystems.harmonic_oscillators.HarmonicOscillatorsTestCase(O_k=O_k, K_k=K_k, beta=beta) [x_kn,u_kln,N_k] = randomsample.sample(N_k,mode='u_kln') # get the unreduced energies U_kln = u_kln/beta #============================================================================================= # Estimate free energies and expectations. #============================================================================================= # Initialize the MBAR class, determining the free energies. mbar = MBAR(u_kln, N_k, relative_tolerance=1.0e-10,verbose=False) # use fast Newton-Raphson solver results = mbar.getFreeEnergyDifferences(return_dict=True) Deltaf_ij_estimated = results['Delta_f'] dDeltaf_ij_estimated = results['dDelta_f'] # Compute error from analytical free energy differences. Deltaf_ij_error = Deltaf_ij_estimated - Deltaf_ij_analytical # Estimate the expectation of the mean-squared displacement at each condition. if observe == 'RMS displacement': A_kn = numpy.zeros([K,K,N_max], dtype = numpy.float64); for k in range(0,K): for l in range(0,K): A_kn[k,l,0:N_k[k]] = (x_kn[k,0:N_k[k]] - O_k[l])**2 # observable is the squared displacement # observable is the potential energy, a 3D array since the potential energy is a function of # thermodynamic state
xbar1_bs = zeros((bootstrap_trials, xbar1.size)) for trial in xrange(bootstrap_trials): msmle.resample() msmle.solve_uwham(f1) f1_bs[trial] = msmle.f f1_bs[trial] -= msmle.f[0] xbar1_bs[trial] = msmle.compute_expectations(test.x_jn, False)[0] ferr1_bs = f1_bs.std(axis=0)[1:] varxbar1_bs = xbar1_bs.var(axis=0) msmle.revert_sample() f1 = f1[1:] if do_pymbar: try: mbar = MBAR(test.data, test.data_size) f2, ferr2, t = mbar.getFreeEnergyDifferences() f2 = f2[0][1:] ferr2 = ferr2[0][1:] xbar2, varxbar2 = mbar.computeExpectations(test.x_jn) skipmbar = False except: print('MBAR choked!') skipmbar = True pass else: skipmbar = True def print_float_array(msg, arr): print('%-16s '%msg + ' '.join(('% 6.4f'%x for x in arr))) print('samples:', test.data_size)
print "======================================" print " Initializing MBAR " print "======================================" # Estimate free energies from simulation using MBAR. print "Estimating relative free energies from simulation (this may take a while)..." # Initialize the MBAR class, determining the free energies. mbar = MBAR(u_kln, N_k, relative_tolerance=1.0e-10, verbose=True) # Get matrix of dimensionless free energy differences and uncertainty estimate. print "=============================================" print " Testing getFreeEnergyDifferences " print "=============================================" (Delta_f_ij_estimated, dDelta_f_ij_estimated, _Theta_ij) = mbar.getFreeEnergyDifferences() # Compute error from analytical free energy differences. Delta_f_ij_error = Delta_f_ij_estimated - Delta_f_ij_analytical print "Error in free energies is:" print Delta_f_ij_error print "Uncertainty in free energies is:" print dDelta_f_ij_estimated print "Standard deviations away is:" # mathematical manipulation to avoid dividing by zero errors; we don't care # about the diagnonals, since they are identically zero. df_ij_mod = dDelta_f_ij_estimated + numpy.identity(K) stdevs = numpy.abs(Delta_f_ij_error/df_ij_mod) for k in range(K):
class MBAR(BaseEstimator): """Multi-state Bennett acceptance ratio (MBAR). Parameters ---------- maximum_iterations : int, optional Set to limit the maximum number of iterations performed. relative_tolerance : float, optional Set to determine the relative tolerance convergence criteria. initial_f_k : np.ndarray, float, shape=(K), optional Set to the initial dimensionless free energies to use as a guess (default None, which sets all f_k = 0). method : str, optional, default="hybr" The optimization routine to use. This can be any of the methods available via scipy.optimize.minimize() or scipy.optimize.root(). verbose : bool, optional Set to True if verbose debug output is desired. Attributes ---------- delta_f_ : DataFrame The estimated dimensionless free energy difference between each state. d_delta_f_ : DataFrame The estimated statistical uncertainty (one standard deviation) in dimensionless free energy differences. theta_ : DataFrame The theta matrix. states_ : list Lambda states for which free energy differences were obtained. """ def __init__(self, maximum_iterations=10000, relative_tolerance=1.0e-7, initial_f_k=None, method='hybr', verbose=False): self.maximum_iterations = maximum_iterations self.relative_tolerance = relative_tolerance self.initial_f_k = initial_f_k self.method = [dict(method=method)] self.verbose = verbose # handle for pymbar.MBAR object self._mbar = None def fit(self, u_nk): """ Compute overlap matrix of reduced potentials using multi-state Bennett acceptance ratio. Parameters ---------- u_nk : DataFrame u_nk[n,k] is the reduced potential energy of uncorrelated configuration n evaluated at state k. """ # sort by state so that rows from same state are in contiguous blocks u_nk = u_nk.sort_index(level=u_nk.index.names[1:]) groups = u_nk.groupby(level=u_nk.index.names[1:]) N_k = [(len(groups.get_group(i)) if i in groups.groups else 0) for i in u_nk.columns] self._mbar = MBAR_(u_nk.T, N_k, maximum_iterations=self.maximum_iterations, relative_tolerance=self.relative_tolerance, initial_f_k=self.initial_f_k, solver_protocol=self.method, verbose=self.verbose) self.states_ = u_nk.columns.values.tolist() # set attributes out = self._mbar.getFreeEnergyDifferences(return_theta=True) attrs = [ pd.DataFrame(i, columns=self.states_, index=self.states_) for i in out ] (self.delta_f_, self.d_delta_f_, self.theta_) = attrs return self def predict(self, u_ln): pass @property def overlap_matrix(self): r"""MBAR overlap matrix. The estimated state overlap matrix :math:`O_{ij}` is an estimate of the probability of observing a sample from state :math:`i` in state :math:`j`. The :attr:`overlap_matrix` is computed on-the-fly. Assign it to a variable if you plan to re-use it. See Also --------- pymbar.mbar.MBAR.computeOverlap """ return self._mbar.computeOverlap()['matrix']
sumN_k = nSnapshots*len(Temp_sim) Nmol_kn = N_all.reshape([N_all.size]) u_kn_sim = np.zeros([len(Temp_sim),nSnapshots*len(Temp_sim)]) for iT, (Temp, mu) in enumerate(zip(Temp_sim, mu_sim)): for jT in range(len(Temp_sim)): jstart = nSnapshots*jT jend = jstart+nSnapshots u_kn_sim[iT,jstart:jend] = U_to_u(U_all[jT],Temp,mu,N_all[jT]) mbar = MBAR(u_kn_sim,N_k) Deltaf_ij = mbar.getFreeEnergyDifferences(return_theta=False)[0] #print "effective sample numbers" #print (mbar.computeEffectiveSampleNumber()) #print('\nWhich is approximately '+str(mbar.computeEffectiveSampleNumber()/sumN_k*100.)+'%') f_k_sim = Deltaf_ij[0,:] #mbar2 = MBAR(u_kn_sim,N_k,initial_f_k=f_k_sim) # #Deltaf_ij2 = mbar2.getFreeEnergyDifferences(return_theta=False)[0] #print "effective sample numbers" #print (mbar2.computeEffectiveSampleNumber()) #print('\nWhich is approximately '+str(mbar2.computeEffectiveSampleNumber()/sumN_k*100.)+'%') #Nmolk, dNmolk = mbar.computeExpectations(Nmol_kn) # Average number of molecules #Nmolk_alt = np.zeros(len(N_k))
# Generate independent data samples from K one-dimensional harmonic oscillators centered at q = 0. #============================================================================================= randomsample = testsystems.harmonic_oscillators.HarmonicOscillatorsTestCase(O_k=O_k, K_k=K_k, beta=beta) [x_kn,u_kln,N_k] = randomsample.sample(N_k,mode='u_kln') # get the unreduced energies U_kln = u_kln/beta #============================================================================================= # Estimate free energies and expectations. #============================================================================================= # Initialize the MBAR class, determining the free energies. mbar = MBAR(u_kln, N_k, method = 'adaptive',relative_tolerance=1.0e-10,verbose=False) # use fast Newton-Raphson solver (Deltaf_ij_estimated, dDeltaf_ij_estimated) = mbar.getFreeEnergyDifferences() # Compute error from analytical free energy differences. Deltaf_ij_error = Deltaf_ij_estimated - Deltaf_ij_analytical # Estimate the expectation of the mean-squared displacement at each condition. if observe == 'RMS displacement': A_kn = numpy.zeros([K,K,N_max], dtype = numpy.float64); for k in range(0,K): for l in range(0,K): A_kn[k,l,0:N_k[k]] = (x_kn[k,0:N_k[k]] - O_k[l])**2 # observable is the squared displacement # observable is the potential energy, a 3D array since the potential energy is a function of # thermodynamic state elif observe == 'potential energy': A_kn = U_kln
def estimate_free_energies(ncfile, ndiscard=0, nuse=None, g=None): """ Estimate free energies of all alchemical states. Parameters ---------- ncfile : NetCDF Input YANK netcdf file ndiscard : int, optional, default=0 Number of iterations to discard to equilibration nuse : int, optional, default=None Maximum number of iterations to use (after discarding) g : int, optional, default=None Statistical inefficiency to use if desired; if None, will be computed. TODO ---- * Automatically determine 'ndiscard'. """ # Get current dimensions. niterations = ncfile.variables['energies'].shape[0] nstates = ncfile.variables['energies'].shape[1] natoms = ncfile.variables['energies'].shape[2] # Extract energies. logger.info("Reading energies...") energies = ncfile.variables['energies'] u_kln_replica = np.zeros([nstates, nstates, niterations], np.float64) for n in range(niterations): u_kln_replica[:,:,n] = energies[n,:,:] logger.info("Done.") # Deconvolute replicas logger.info("Deconvoluting replicas...") u_kln = np.zeros([nstates, nstates, niterations], np.float64) for iteration in range(niterations): state_indices = ncfile.variables['states'][iteration,:] u_kln[state_indices,:,iteration] = energies[iteration,:,:] logger.info("Done.") # Compute total negative log probability over all iterations. u_n = np.zeros([niterations], np.float64) for iteration in range(niterations): u_n[iteration] = np.sum(np.diagonal(u_kln[:,:,iteration])) #logger.info(u_n # DEBUG outfile = open('u_n.out', 'w') for iteration in range(niterations): outfile.write("%8d %24.3f\n" % (iteration, u_n[iteration])) outfile.close() # Discard initial data to equilibration. u_kln_replica = u_kln_replica[:,:,ndiscard:] u_kln = u_kln[:,:,ndiscard:] u_n = u_n[ndiscard:] # Truncate to number of specified conforamtions to use if (nuse): u_kln_replica = u_kln_replica[:,:,0:nuse] u_kln = u_kln[:,:,0:nuse] u_n = u_n[0:nuse] # Subsample data to obtain uncorrelated samples N_k = np.zeros(nstates, np.int32) indices = timeseries.subsampleCorrelatedData(u_n, g=g) # indices of uncorrelated samples #print u_n # DEBUG #indices = range(0,u_n.size) # DEBUG - assume samples are uncorrelated N = len(indices) # number of uncorrelated samples N_k[:] = N u_kln[:,:,0:N] = u_kln[:,:,indices] logger.info("number of uncorrelated samples:") logger.info(N_k) logger.info("") #=================================================================================================== # Estimate free energy difference with MBAR. #=================================================================================================== # Initialize MBAR (computing free energy estimates, which may take a while) logger.info("Computing free energy differences...") mbar = MBAR(u_kln, N_k) # Get matrix of dimensionless free energy differences and uncertainty estimate. logger.info("Computing covariance matrix...") try: # pymbar 2 (Deltaf_ij, dDeltaf_ij) = mbar.getFreeEnergyDifferences() except ValueError: # pymbar 3 (Deltaf_ij, dDeltaf_ij, theta_ij) = mbar.getFreeEnergyDifferences() # # Matrix of free energy differences logger.info("Deltaf_ij:") for i in range(nstates): str_row = "" for j in range(nstates): str_row += "%8.3f" % Deltaf_ij[i, j] logger.info(str_row) # print Deltaf_ij # # Matrix of uncertainties in free energy difference (expectations standard deviations of the estimator about the true free energy) logger.info("dDeltaf_ij:") for i in range(nstates): str_row = "" for j in range(nstates): str_row += "%8.3f" % dDeltaf_ij[i, j] logger.info(str_row) # Return free energy differences and an estimate of the covariance. return (Deltaf_ij, dDeltaf_ij)