def _calc_singleLP_exTI_pred_mbar(Es, dEs, LPs_pred, nfr, T=300): assert Es.shape == dEs.shape kT = kb * T mbar = MBAR(Es / kT, nfr) w = mbar.getWeights() wt = w.T dgdl_int = [] for i in range(len(LPs_pred)): dgdl_int.append(np.dot(wt[i], dEs[i])) return np.array(dgdl_int)
def run_mbar(self): r"""Runs MBAR free energy estimate """ MBAR_obj = MBAR(self._u_kln, self._N_k, verbose=True) self._f_k = MBAR_obj.f_k (deltaF_ij, dDeltaF_ij, theta_ij) = MBAR_obj.getFreeEnergyDifferences() self._deltaF_mbar = deltaF_ij[0, self._lambda_array.shape[0]-1] self._dDeltaF_mbar = dDeltaF_ij[0, self._lambda_array.shape[0]-1] self._pmf_mbar = np.zeros(shape=(self._lambda_array.shape[0], 2)) self._pmf_mbar[:, 0] = self._lambda_array self._pmf_mbar[:, 1] = self._f_k
def test_mbar_getWeights(): """ testing getWeights """ for system_generator in system_generators: name, test = system_generator() x_n, u_kn, N_k_output, s_n = test.sample(N_k, mode='u_kn') mbar = MBAR(u_kn, N_k) # rows should be equal to zero W = mbar.getWeights() sumrows = np.sum(W, axis=0) eq(sumrows, np.ones(len(sumrows)), decimal=precision)
def test_mbar_computeExpectationsInner(): """Can MBAR calculate general expectations inner code (note: this just tests completion)""" for system_generator in system_generators: name, test = system_generator() x_n, u_kn, N_k_output, s_n = test.sample(N_k, mode='u_kn') eq(N_k, N_k_output) mbar = MBAR(u_kn, N_k) A_in = np.array([x_n, x_n**2, x_n**3]) u_n = u_kn[:2, :] state_map = np.array([[0, 0], [1, 0], [2, 0], [2, 1]], int) _ = mbar.computeExpectationsInner(A_in, u_n, state_map)
def test_mbar_computeExpectations_position_differences(): """Can MBAR calculate E(x_n)??""" for system_generator in system_generators: name, test = system_generator() x_n, u_kn, N_k_output, s_n = test.sample(N_k, mode='u_kn') eq(N_k, N_k_output) mbar = MBAR(u_kn, N_k) mu_ij, sigma_ij = mbar.computeExpectations(x_n, output='differences') mu0 = test.analytical_observable(observable='position') z = convert_to_differences(mu_ij, sigma_ij, mu0) eq(z / z_scale_factor, np.zeros(np.shape(z)), decimal=0)
def test_mbar_computeOverlap(): # tests with identical states, which gives analytical results. d = len(N_k) even_O_k = 2.0 * np.ones(d) even_K_k = 0.5 * np.ones(d) even_N_k = 100 * np.ones(d) name, test = generate_ho(O_k=even_O_k, K_k=even_K_k) x_n, u_kn, N_k_output, s_n = test.sample(even_N_k, mode='u_kn') mbar = MBAR(u_kn, even_N_k) results = mbar.computeOverlap() overlap_scalar = results['scalar'] eigenval = results['eigenvalues'] O = results['matrix'] reference_matrix = np.matrix((1.0 / d) * np.ones([d, d])) reference_eigenvalues = np.zeros(d) reference_eigenvalues[0] = 1.0 reference_scalar = np.float64(1.0) eq(O, reference_matrix, decimal=precision) eq(eigenval, reference_eigenvalues, decimal=precision) eq(overlap_scalar, reference_scalar, decimal=precision) # test of more straightforward examples for system_generator in system_generators: name, test = system_generator() x_n, u_kn, N_k_output, s_n = test.sample(N_k, mode='u_kn') mbar = MBAR(u_kn, N_k) results = mbar.computeOverlap() overlap_scalar = results['scalar'] eigenval = results['eigenvalues'] O = results['matrix'] # rows of matrix should sum to one sumrows = np.array(np.sum(O, axis=1)) eq(sumrows, np.ones(np.shape(sumrows)), decimal=precision) eq(eigenval[0], np.float64(1.0), decimal=precision)
def test_mbar_computeExpectations_position_averages(): """Can MBAR calculate E(x_n)??""" for system_generator in system_generators: name, test = system_generator() x_n, u_kn, N_k_output, s_n = test.sample(N_k, mode='u_kn') eq(N_k, N_k_output) mbar = MBAR(u_kn, N_k) mu, sigma = mbar.computeExpectations(x_n) mu0 = test.analytical_observable(observable='position') z = (mu0 - mu) / sigma eq(z / z_scale_factor, np.zeros(len(z)), decimal=0)
def test_mbar_computeExpectations_potential(): """Can MBAR calculate E(u_kn)??""" for system_generator in system_generators: name, test = system_generator() x_n, u_kn, N_k_output, s_n = test.sample(N_k, mode='u_kn') eq(N_k, N_k_output) mbar = MBAR(u_kn, N_k) mu, sigma = mbar.computeExpectations(u_kn, state_dependent=True) mu0 = test.analytical_observable(observable='potential energy') print(mu) print(mu0) z = (mu0 - mu) / sigma eq(z / z_scale_factor, np.zeros(len(z)), decimal=0)
def test_mbar_computeEffectiveSampleNumber(): """ testing computeEffectiveSampleNumber """ for system_generator in system_generators: name, test = system_generator() x_n, u_kn, N_k_output, s_n = test.sample(N_k, mode='u_kn') eq(N_k, N_k_output) mbar = MBAR(u_kn, N_k) # one mathematical effective sample numbers should be between N_k and sum_k N_k N_eff = mbar.computeEffectiveSampleNumber() sumN = np.sum(N_k) assert all(N_eff > N_k) assert all(N_eff < sumN)
def gather_dg(self, u_kln, nstates): # Subsample data to extract uncorrelated equilibrium timeseries N_k = np.zeros([nstates], np.int32) # number of uncorrelated samples for k in range(nstates): [_, g, __] = timeseries.detectEquilibration(u_kln[k, k, :]) indices = timeseries.subsampleCorrelatedData(u_kln[k, k, :], g=g) N_k[k] = len(indices) u_kln[k, :, 0:N_k[k]] = u_kln[k, :, indices].T # Compute free energy differences and statistical uncertainties mbar = MBAR(u_kln, N_k) [DeltaF_ij, dDeltaF_ij, _] = mbar.getFreeEnergyDifferences() print("Number of uncorrelated samples per state: {}".format(N_k)) return DeltaF_ij, dDeltaF_ij
def test_mbar_computeExpectations_position2(): """Can MBAR calculate E(x_n^2)??""" for system_generator in system_generators: name, test = system_generator() x_n, u_kn, N_k_output, s_n = test.sample(N_k, mode='u_kn') eq(N_k, N_k_output) mbar = MBAR(u_kn, N_k) results = mbar.computeExpectations(x_n**2, return_dict=True) mu = results['mu'] sigma = results['sigma'] mu0 = test.analytical_observable(observable='position^2') z = (mu0 - mu) / sigma eq(z / z_scale_factor, np.zeros(len(z)), decimal=0)
def test_mbar_free_energies(): """Can MBAR calculate moderately correct free energy differences?""" for system_generator in system_generators: name, test = system_generator() x_n, u_kn, N_k_output, s_n = test.sample(N_k, mode='u_kn') eq(N_k, N_k_output) mbar = MBAR(u_kn, N_k) fe, fe_sigma, Theta_ij = mbar.getFreeEnergyDifferences() fe, fe_sigma = fe[0, 1:], fe_sigma[0, 1:] fe0 = test.analytical_free_energies() fe0 = fe0[1:] - fe0[0] z = (fe - fe0) / fe_sigma eq(z / z_scale_factor, np.zeros(len(z)), decimal=0)
def test_exponential_mbar_free_energies(): """Exponential Distribution Test: can MBAR calculate correct free energy differences?""" test = exponential_distributions.ExponentialTestCase(rates) x_n, u_kn, origin = test.sample(N_k) u_ijn, N_k_output = convert_ukn_to_uijn(u_kn) eq(N_k, N_k_output.values) mbar = MBAR(u_ijn.values, N_k) fe, fe_sigma = mbar.getFreeEnergyDifferences() fe, fe_sigma = fe[0], fe_sigma[0] fe0 = test.analytical_free_energies() z = (fe - fe0) / fe_sigma z = z[1:] # First component is undetermined. eq(z / z_scale_factor, np.zeros(len(z)), decimal=0)
def sqdeltaW(mu_VLE): print(U_flat.shape, U_flat[:10], Nmol_flat.shape, Nmol_flat[:10]) for jT, (Temp, mu) in enumerate(zip(Temp_VLE, mu_VLE)): u_kn[jT0 + jT, :] = U_to_u(U_flat, Temp, mu, Nmol_flat) # print(u_kn.shape,np.array(N_k).shape,f_k_guess.shape) mbar = MBAR(u_kn, N_k, initial_f_k=f_k_guess) sumWliq = np.sum(mbar.W_nk[:, jT0:][Nmol_flat > Ncut], axis=0) sumWvap = np.sum(mbar.W_nk[:, jT0:][Nmol_flat <= Ncut], axis=0) sqdeltaW_VLE = (sumWliq - sumWvap)**2 ### Could be advantageous to store this. But needs to be outside the function. Either as a global variable or within the optimizer ### I guess within the class I can store this as self.f_k_guess and update it each time the function is called # Deltaf_ij = mbar.getFreeEnergyDifferences(return_theta=False)[0] # f_k_guess = Deltaf_ij[0,:] return sqdeltaW_VLE
def test_mbar_computePerturbedFreeEnergeies(): """ testing computePerturbedFreeEnergies """ for system_generator in system_generators: name, test = system_generator() x_n, u_kn, N_k_output, s_n = test.sample(N_k, mode='u_kn') numN = np.sum(N_k[:2]) mbar = MBAR(u_kn[:2, :numN], N_k[:2]) # only do MBAR with the first and last set fe, fe_sigma = mbar.computePerturbedFreeEnergies(u_kn[2:, :numN]) fe, fe_sigma = fe[0, 1:], fe_sigma[0, 1:] print(fe, fe_sigma) fe0 = test.analytical_free_energies()[2:] fe0 = fe0[1:] - fe0[0] z = (fe - fe0) / fe_sigma eq(z / z_scale_factor, np.zeros(len(z)), decimal=0)
def __init__(self, ani_model: AlchemicalANI, ani_trajs: list, potential_energy_trajs: list, lambdas, max_snapshots_per_window=50, ): K = len(lambdas) assert (len(ani_trajs) == K) assert (len(potential_energy_trajs) == K) self.ani_model = ani_model self.ani_trajs = ani_trajs self.potential_energy_trajs = potential_energy_trajs self.lambdas = lambdas # thin each based automatic equilibration detection N_k = [] snapshots = [] for i in range(K): traj = self.ani_trajs[i] equil, g = detectEquilibration(self.potential_energy_trajs[i])[:2] thinning = int(g) if len(traj[equil::thinning]) > max_snapshots_per_window: # what thinning will give me len(traj[equil::thinning]) == max_snapshots_per_window? thinning = int((len(traj) - equil) / max_snapshots_per_window) new_snapshots = list(traj[equil::thinning].xyz * unit.nanometer)[:max_snapshots_per_window] N_k.append(len(new_snapshots)) snapshots.extend(new_snapshots) self.snapshots = snapshots N = len(snapshots) u_kn = np.zeros((K, N)) for k in range(K): lamb = lambdas[k] self.ani_model.lambda_value = lamb for n in range(N): u_kn[k, n] = self.ani_model.calculate_energy(snapshots[n]) / kT self.mbar = MBAR(u_kn, N_k)
def test_mbar_computeMultipleExpectations(): """Can MBAR calculate E(u_kn)??""" for system_generator in system_generators: name, test = system_generator() x_n, u_kn, N_k_output, s_n = test.sample(N_k, mode='u_kn') eq(N_k, N_k_output) mbar = MBAR(u_kn, N_k) A = np.zeros([2, len(x_n)]) A[0, :] = x_n A[1, :] = x_n**2 state = 1 mu, sigma, covariances = mbar.computeMultipleExpectations( A, u_kn[state, :]) mu0 = test.analytical_observable(observable='position')[state] mu1 = test.analytical_observable(observable='position^2')[state] z = (mu0 - mu[0]) / sigma[0] eq(z / z_scale_factor, 0 * z, decimal=0) z = (mu1 - mu[1]) / sigma[1] eq(z / z_scale_factor, 0 * z, decimal=0)
def test_exponential_mbar__xkn(): """Harmonic Oscillators Test: can MBAR calculate E(x_kn)??""" test = harmonic_oscillators.HarmonicOscillatorsTestCase(O_k, k_k) x_n, u_kn, origin = test.sample(N_k) u_ijn, N_k_output = convert_ukn_to_uijn(u_kn) eq(N_k, N_k_output.values) mbar = MBAR(u_ijn.values, N_k) x_kn = convert_xn_to_x_kn(x_n) x_kn = x_kn.values # Convert to numpy for MBAR x_kn[np.isnan(x_kn)] = 0.0 # Convert nans to 0.0 mu, sigma = mbar.computeExpectations(x_kn) mu0 = test.analytical_means() z = (mu0 - mu) / sigma eq(z / z_scale_factor, np.zeros(len(z)), decimal=0)
def test_exponential_mbar_xkn_squared(): """Exponential Distribution Test: can MBAR calculate E(x_kn^2)""" test = exponential_distributions.ExponentialTestCase(rates) x_n, u_kn, origin = test.sample(N_k) u_ijn, N_k_output = convert_ukn_to_uijn(u_kn) eq(N_k, N_k_output.values) mbar = MBAR(u_ijn.values, N_k) x_kn = convert_xn_to_x_kn(x_n) ** 2.0 x_kn = x_kn.values # Convert to numpy for MBAR x_kn[np.isnan(x_kn)] = 0.0 # Convert nans to 0.0 mu, sigma = mbar.computeExpectations(x_kn) mu0 = test.analytical_x_squared() z = (mu0 - mu) / sigma eq(z / z_scale_factor, np.zeros(len(z)), decimal=0)
def test_mbar_computePMF(): """ testing computePMF """ name, test = generate_ho() x_n, u_kn, N_k_output, s_n = test.sample(N_k, mode='u_kn') mbar = MBAR(u_kn, N_k) #do a 1d PMF of the potential in the 3rd state: refstate = 2 dx = 0.25 xmin = test.O_k[refstate] - 1 xmax = test.O_k[refstate] + 1 within_bounds = (x_n >= xmin) & (x_n < xmax) bin_centers = dx * np.arange(np.int(xmin / dx), np.int(xmax / dx)) + dx / 2 bin_n = np.zeros(len(x_n), int) bin_n[within_bounds] = 1 + np.floor((x_n[within_bounds] - xmin) / dx) # 0 is reserved for samples outside the domain. We will ignore this state range = np.max(bin_n) + 1 results = mbar.computePMF(u_kn[refstate, :], bin_n, range, uncertainties='from-specified', pmf_reference=1, return_dict=True) f_t, df_t = mbar.computePMF(u_kn[refstate, :], bin_n, range, uncertainties='from-specified', pmf_reference=1, return_dict=False) f_i = results['f_i'] df_i = results['df_i'] eq(f_i, f_t) eq(df_i, df_t) f0_i = 0.5 * test.K_k[refstate] * (bin_centers - test.O_k[refstate])**2 f_i, df_i = f_i[2:], df_i[ 2:] # first state is ignored, second is zero, with zero uncertainty normf0_i = f0_i[1:] - f0_i[0] # normalize to first state z = (f_i - normf0_i) / df_i eq(z / z_scale_factor, np.zeros(len(z)), decimal=0)
def sqdeltaW(mu_VLE): for jT, (Temp, mu) in enumerate(zip(Temp_VLE, mu_VLE)): u_kn[jT0+jT,:] = U_to_u(eps_ratio*U_all_flat,Temp,mu,N_all_flat) mbar = MBAR(u_kn,N_k,initial_f_k=f_k_guess) sqdeltaW_VLE = np.zeros(len(Temp_VLE)) for jT in range(len(Temp_VLE)): sumWliq = np.sum(mbar.W_nk[:,jT0+jT][Nmol_kn>Ncut]) sumWvap = np.sum(mbar.W_nk[:,jT0+jT][Nmol_kn<=Ncut]) sqdeltaW_VLE[jT] = (sumWliq - sumWvap)**2 ### Could be advantageous to store this. But needs to be outside the function. Either as a global variable or within the optimizer ### I guess within the class I can store this as self.f_k_guess and update it each time the function is called # Deltaf_ij = mbar.getFreeEnergyDifferences(return_theta=False)[0] # f_k_guess = Deltaf_ij[0,:] return sqdeltaW_VLE
def gather_dg(self, u_kln, nstates): u_kln = np.vstack(u_kln) # Subsample data to extract uncorrelated equilibrium timeseries N_k = np.zeros([nstates], np.int32) # number of uncorrelated samples for k in range(nstates): [_, g, __] = timeseries.detectEquilibration(u_kln[k, k, :]) indices = timeseries.subsampleCorrelatedData(u_kln[k, k, :], g=g) N_k[k] = len(indices) u_kln[k, :, 0:N_k[k]] = u_kln[k, :, indices].T # Compute free energy differences and statistical uncertainties mbar = MBAR(u_kln, N_k) [DeltaF_ij, dDeltaF_ij, _] = mbar.getFreeEnergyDifferences() logger.debug( "Number of uncorrelated samples per state: {}".format(N_k)) logger.debug("Relative free energy change for {0} = {1} +- {2}".format( self.name, DeltaF_ij[0, nstates - 1] * self.kTtokcal, dDeltaF_ij[0, nstates - 1] * self.kTtokcal)) return DeltaF_ij[0, nstates - 1] * self.kTtokcal, dDeltaF_ij[0, nstates - 1] * self.kTtokcal
def run_mbar(self, test_overlap=True): r"""Runs MBAR free energy estimate """ MBAR_obj = MBAR(self._u_kln, self._N_k, verbose=True) self._f_k = MBAR_obj.f_k (deltaF_ij, dDeltaF_ij, theta_ij) = MBAR_obj.getFreeEnergyDifferences() self._deltaF_mbar = deltaF_ij[0, self._lambda_array.shape[0] - 1] self._dDeltaF_mbar = dDeltaF_ij[0, self._lambda_array.shape[0] - 1] self._pmf_mbar = numpy.zeros(shape=(self._lambda_array.shape[0], 3)) self._pmf_mbar[:, 0] = self._lambda_array self._pmf_mbar[:, 1] = self._f_k self._pmf_mbar[:, 2] = dDeltaF_ij[0] self._pairwise_F = numpy.zeros(shape=(self._lambda_array.shape[0] - 1, 4)) self._pairwise_F[:, 0] = self._lambda_array[:-1] self._pairwise_F[:, 1] = self._lambda_array[1:] self._pairwise_F[:, 2] = numpy.diag(deltaF_ij, 1) self._pairwise_F[:, 3] = numpy.diag(dDeltaF_ij, 1) ##testing data overlap: if test_overlap: overlap_matrix = MBAR_obj.computeOverlap() self._overlap_matrix = overlap_matrix[2]
def test_mbar_computeEntropyAndEnthalpy(): """Can MBAR calculate f_k, <u_k> and s_k ??""" for system_generator in system_generators: name, test = system_generator() x_n, u_kn, N_k_output, s_n = test.sample(N_k, mode='u_kn') eq(N_k, N_k_output) mbar = MBAR(u_kn, N_k) results = mbar.computeEntropyAndEnthalpy(u_kn, return_dict=True) f_t, df_t, u_t, du_t, s_t, ds_t = mbar.computeEntropyAndEnthalpy( u_kn, return_dict=False) f_ij = results['Delta_f'] df_ij = results['dDelta_f'] u_ij = results['Delta_u'] du_ij = results['dDelta_u'] s_ij = results['Delta_s'] ds_ij = results['dDelta_s'] eq(f_ij, f_t) eq(df_ij, df_t) eq(u_ij, u_t) eq(du_ij, du_t) eq(s_ij, s_t) eq(ds_ij, ds_t) fa = test.analytical_free_energies() ua = test.analytical_observable('potential energy') sa = test.analytical_entropies() fa_ij = np.array(np.matrix(fa) - np.matrix(fa).transpose()) ua_ij = np.array(np.matrix(ua) - np.matrix(ua).transpose()) sa_ij = np.array(np.matrix(sa) - np.matrix(sa).transpose()) z = convert_to_differences(f_ij, df_ij, fa) eq(z / z_scale_factor, np.zeros(np.shape(z)), decimal=0) z = convert_to_differences(u_ij, du_ij, ua) eq(z / z_scale_factor, np.zeros(np.shape(z)), decimal=0) z = convert_to_differences(s_ij, ds_ij, sa) eq(z / z_scale_factor, np.zeros(np.shape(z)), decimal=0)
def run_mbar(self, ndiscard=0, nuse=None): """Estimate free energies of all alchemical states. Parameters ---------- ndiscard : int, optinoal, default=0 number of iterations to discard to equilibration nuse : int, optional, default=None maximum number of iterations to use (after discarding) Returns ------- Deltaf_ij : np.ndarray, shape=(n_states, n_states) The statewise free energy differences dDeltaf_ij : np.ndarray, shape=(n_states, n_states) The statewise free energy difference uncertainties """ u_kln_replica, u_kln, u_n = self.get_u_kln() u_kln_replica, u_kln, u_n, N_k, N = self.equilibrate_and_subsample(u_kln_replica, u_kln, u_n, ndiscard=ndiscard, nuse=nuse) logger.info("Initialing MBAR and computing free energy differences...") mbar = MBAR(u_kln, N_k, verbose = False, method = 'self-consistent-iteration', maximum_iterations = 50000) # use slow self-consistent-iteration (the default) # Get matrix of dimensionless free energy differences and uncertainty estimate. logger.info("Computing covariance matrix...") (Deltaf_ij, dDeltaf_ij) = mbar.getFreeEnergyDifferences(uncertainty_method='svd-ew') logger.info("\n%-24s %16s\n%s" % ("Deltaf_ij", "current state", pd.DataFrame(Deltaf_ij).to_string())) logger.info("\n%-24s %16s\n%s" % ("Deltaf_ij", "current state", pd.DataFrame(dDeltaf_ij).to_string())) return (Deltaf_ij, dDeltaf_ij)
def test_mbar_free_energies(): """Can MBAR calculate moderately correct free energy differences?""" for system_generator in system_generators: name, test = system_generator() x_n, u_kn, N_k_output, s_n = test.sample(N_k, mode='u_kn') eq(N_k, N_k_output) mbar = MBAR(u_kn, N_k) results = mbar.getFreeEnergyDifferences(return_dict=True) fe_t, dfe_t = mbar.getFreeEnergyDifferences(return_dict=False) fe = results['Delta_f'] fe_sigma = results['dDelta_f'] eq(fe, fe_t) eq(fe_sigma, dfe_t) fe, fe_sigma = fe[0, 1:], fe_sigma[0, 1:] fe0 = test.analytical_free_energies() fe0 = fe0[1:] - fe0[0] z = (fe - fe0) / fe_sigma eq(z / z_scale_factor, np.zeros(len(z)), decimal=0)
#============================================================================================= # Generate independent data samples from K one-dimensional harmonic oscillators centered at q = 0. #============================================================================================= randomsample = testsystems.harmonic_oscillators.HarmonicOscillatorsTestCase(O_k=O_k, K_k=K_k, beta=beta) [x_kn,u_kln,N_k] = randomsample.sample(N_k,mode='u_kln') # get the unreduced energies U_kln = u_kln/beta #============================================================================================= # Estimate free energies and expectations. #============================================================================================= # Initialize the MBAR class, determining the free energies. mbar = MBAR(u_kln, N_k, method = 'adaptive',relative_tolerance=1.0e-10,verbose=False) # use fast Newton-Raphson solver (Deltaf_ij_estimated, dDeltaf_ij_estimated) = mbar.getFreeEnergyDifferences() # Compute error from analytical free energy differences. Deltaf_ij_error = Deltaf_ij_estimated - Deltaf_ij_analytical # Estimate the expectation of the mean-squared displacement at each condition. if observe == 'RMS displacement': A_kn = numpy.zeros([K,K,N_max], dtype = numpy.float64); for k in range(0,K): for l in range(0,K): A_kn[k,l,0:N_k[k]] = (x_kn[k,0:N_k[k]] - O_k[l])**2 # observable is the squared displacement # observable is the potential energy, a 3D array since the potential energy is a function of # thermodynamic state elif observe == 'potential energy':
def free_energy_trace(self, discard_from_start=1, n_trace=10): """ Trace the free energy by keeping fewer and fewer samples in both forward and reverse direction Returns ------- free_energy_trace_figure : matplotlib.figure Figure showing the equilibration between both phases """ trace_spacing = 1.0/n_trace def format_trace_plot(plot: plt.Axes, trace_forward: np.ndarray, trace_reverse: np.ndarray): x = np.arange(n_trace + 1)[1:] * trace_spacing * 100 plot.errorbar(x, trace_forward[:, 0], yerr=2 * trace_forward[:, 1], ecolor='b', elinewidth=0, mec='none', mew=0, linestyle='None', zorder=10) plot.plot(x, trace_forward[:, 0], 'b-', marker='o', mec='b', mfc='w', label='Forward', zorder=20,) plot.errorbar(x, trace_reverse[:, 0], yerr=2 * trace_reverse[:, 1], ecolor='r', elinewidth=0, mec='none', mew=0, linestyle='None', zorder=10) plot.plot(x, trace_reverse[:, 0], 'r-', marker='o', mec='r', mfc='w', label='Reverse', zorder=20) y_fill_upper = [trace_forward[-1, 0] + 2 * trace_forward[-1, 1]] * 2 y_fill_lower = [trace_forward[-1, 0] - 2 * trace_forward[-1, 1]] * 2 xlim = [0, 100] plot.fill_between(xlim, y_fill_lower, y_fill_upper, color='orchid', zorder=5) plot.set_xlim(xlim) plot.legend() plot.set_xlabel("% Samples Analyzed", fontsize=20) plot.set_ylabel(r"$\Delta G$ in kcal/mol", fontsize=20) # Adjust figure size plt.rcParams['figure.figsize'] = 15, 6 * (self.nphases + 1) * 2 plot_grid = gridspec.GridSpec(self.nphases + 1, 1) # Vertical distribution free_energy_trace_figure = plt.figure() # Add some space between the figures free_energy_trace_figure.subplots_adjust(hspace=0.4) traces = {} for i, phase_name in enumerate(self.phase_names): traces[phase_name] = {} if phase_name not in self._serialized_data: self._serialized_data[phase_name] = {} serial = self._serialized_data[phase_name] if "free_energy" not in serial: serial["free_energy"] = {} serial = serial["free_energy"] free_energy_trace_f = np.zeros([n_trace, 2], dtype=float) free_energy_trace_r = np.zeros([n_trace, 2], dtype=float) p = free_energy_trace_figure.add_subplot(plot_grid[i]) analyzer = self.analyzers[phase_name] kcal = analyzer.kT / units.kilocalorie_per_mole # Data crunching to get timeseries sampled_energies, _, _, states = analyzer.read_energies() n_replica, n_states, _ = sampled_energies.shape # Sample at index 0 is actually the minimized structure and NOT from the equilibrium distribution # This throws off all of the equilibrium data sampled_energies = sampled_energies[:, :, discard_from_start:] states = states[:, discard_from_start:] total_iterations = sampled_energies.shape[-1] for trace_factor in range(n_trace, 0, -1): # Reverse order tracing trace_percent = trace_spacing*trace_factor j = trace_factor - 1 # Indexing kept_iterations = int(np.ceil(trace_percent*total_iterations)) u_forward = sampled_energies[:, :, :kept_iterations] s_forward = states[:, :kept_iterations] u_reverse = sampled_energies[:, :, -1:-kept_iterations-1:-1] s_reverse = states[:, -1:-kept_iterations - 1:-1] for energy_sub, state_sub, storage in [ (u_forward, s_forward, free_energy_trace_f), (u_reverse, s_reverse, free_energy_trace_r)]: u_n = analyzer.get_effective_energy_timeseries(energies=energy_sub, replica_state_indices=state_sub) i_t, g_i, n_effective_i = analyze.multistate.get_equilibration_data_per_sample(u_n) i_max = n_effective_i.argmax() number_equilibrated = i_t[i_max] g_t = g_i[i_max] if not self.use_full_trajectory: energy_sub = analyze.multistate.utils.remove_unequilibrated_data(energy_sub, number_equilibrated, -1) state_sub = analyze.multistate.utils.remove_unequilibrated_data(state_sub, number_equilibrated, -1) energy_sub = analyze.multistate.utils.subsample_data_along_axis(energy_sub, g_t, -1) state_sub = analyze.multistate.utils.subsample_data_along_axis(state_sub, g_t, -1) samples_per_state = np.zeros([n_states], dtype=int) unique_sampled_states, counts = np.unique(state_sub, return_counts=True) # Assign those counts to the correct range of states samples_per_state[unique_sampled_states] = counts mbar = MBAR(energy_sub, samples_per_state) fe_data = mbar.getFreeEnergyDifferences(compute_uncertainty=True) # Trap theta_ij output try: fe, dfe, _ = fe_data except ValueError: fe, dfe = fe_data ref_i, ref_j = analyzer.reference_states storage[j, :] = fe[ref_i, ref_j] * kcal, dfe[ref_i, ref_j] * kcal format_trace_plot(p, free_energy_trace_f, free_energy_trace_r) p.set_title("{} Phase".format(phase_name.title()), fontsize=20) traces[phase_name]['f'] = free_energy_trace_f traces[phase_name]['r'] = free_energy_trace_r serial['forward'] = free_energy_trace_f.tolist() serial['reverse'] = free_energy_trace_r.tolist() # Finally handle last combined plot combined_trace_f = np.zeros([n_trace, 2], dtype=float) combined_trace_r = np.zeros([n_trace, 2], dtype=float) for phase_name in self.phase_names: phase_f = traces[phase_name]['f'] phase_r = traces[phase_name]['r'] combined_trace_f[:, 0] += phase_f[:, 0] combined_trace_f[:, 1] = np.sqrt(combined_trace_f[:, 1]**2 + phase_f[:, 1]**2) combined_trace_r[:, 0] += phase_r[:, 0] combined_trace_r[:, 1] = np.sqrt(combined_trace_r[:, 1] ** 2 + phase_r[:, 1] ** 2) p = free_energy_trace_figure.add_subplot(plot_grid[-1]) format_trace_plot(p, combined_trace_f, combined_trace_r) p.set_title("Combined Phases", fontsize=20) return free_energy_trace_figure
def estimate_free_energies(ncfile, ndiscard=0, nuse=None): """Estimate free energies of all alchemical states. ARGUMENTS ncfile (NetCDF) - input YANK netcdf file OPTIONAL ARGUMENTS ndiscard (int) - number of iterations to discard to equilibration nuse (int) - maximum number of iterations to use (after discarding) TODO: Automatically determine 'ndiscard'. """ # Get current dimensions. niterations = ncfile.variables['energies'].shape[0] nstates = ncfile.variables['energies'].shape[1] natoms = ncfile.variables['energies'].shape[2] # Extract energies. print "Reading energies..." energies = ncfile.variables['energies'] u_kln_replica = zeros([nstates, nstates, niterations], float64) for n in range(niterations): u_kln_replica[:, :, n] = energies[n, :, :] print "Done." # Deconvolute replicas print "Deconvoluting replicas..." u_kln = zeros([nstates, nstates, niterations], float64) for iteration in range(niterations): state_indices = ncfile.variables['states'][iteration, :] u_kln[state_indices, :, iteration] = energies[iteration, :, :] print "Done." # Compute total negative log probability over all iterations. u_n = zeros([niterations], float64) for iteration in range(niterations): u_n[iteration] = sum(diagonal(u_kln[:, :, iteration])) #print u_n # DEBUG outfile = open('u_n.out', 'w') for iteration in range(niterations): outfile.write("%8d %24.3f\n" % (iteration, u_n[iteration])) outfile.close() # Discard initial data to equilibration. u_kln_replica = u_kln_replica[:, :, ndiscard:] u_kln = u_kln[:, :, ndiscard:] u_n = u_n[ndiscard:] # Truncate to number of specified conforamtions to use if (nuse): u_kln_replica = u_kln_replica[:, :, 0:nuse] u_kln = u_kln[:, :, 0:nuse] u_n = u_n[0:nuse] # Subsample data to obtain uncorrelated samples N_k = zeros(nstates, int32) indices = timeseries.subsampleCorrelatedData( u_n) # indices of uncorrelated samples #indices = range(0,u_n.size) # DEBUG - assume samples are uncorrelated N = len(indices) # number of uncorrelated samples N_k[:] = N u_kln[:, :, 0:N] = u_kln[:, :, indices] print "number of uncorrelated samples:" print N_k print "" #=================================================================================================== # Estimate free energy difference with MBAR. #=================================================================================================== # Initialize MBAR (computing free energy estimates, which may take a while) print "Computing free energy differences..." mbar = MBAR(u_kln, N_k, verbose=False, method='adaptive', maximum_iterations=50000 ) # use slow self-consistent-iteration (the default) #mbar = MBAR(u_kln, N_k, verbose = False, method = 'self-consistent-iteration', maximum_iterations = 50000) # use slow self-consistent-iteration (the default) #mbar = MBAR(u_kln, N_k, verbose = True, method = 'Newton-Raphson') # use faster Newton-Raphson solver # Get matrix of dimensionless free energy differences and uncertainty estimate. print "Computing covariance matrix..." (Deltaf_ij, dDeltaf_ij) = mbar.getFreeEnergyDifferences(uncertainty_method='svd-ew') # # Matrix of free energy differences print "Deltaf_ij:" for i in range(nstates): for j in range(nstates): print "%8.3f" % Deltaf_ij[i, j], print "" # print Deltaf_ij # # Matrix of uncertainties in free energy difference (expectations standard deviations of the estimator about the true free energy) print "dDeltaf_ij:" for i in range(nstates): for j in range(nstates): print "%8.3f" % dDeltaf_ij[i, j], print "" # Return free energy differences and an estimate of the covariance. return (Deltaf_ij, dDeltaf_ij)
simfile.close() print("**************************************************") print("Estimation of free energy with MBAR ...") #try an on the fly mbar estimation # Subsample data to extract uncorrelated equilibrium timeseries N_k = np.zeros([nstates], np.int32) # number of uncorrelated samples for k in range(nstates): [nequil, g, Neff_max] = timeseries.detectEquilibration(u_kln[k, k, :]) indices = timeseries.subsampleCorrelatedData(u_kln[k, k, :], g=g) N_k[k] = len(indices) u_kln[k, :, 0:N_k[k]] = u_kln[k, :, indices].T # Compute free energy differences and statistical uncertainties mbar = MBAR(u_kln, N_k, verbose=True, method="adaptive", relative_tolerance=1e-10) #, initialize="BAR") [DeltaF_ij, dDeltaF_ij, Theta_ij] = mbar.getFreeEnergyDifferences(uncertainty_method='svd-ew') print('DeltaF_ij (kcal/mol):') print(DeltaF_ij[0, nstates - 1] * 298.0 * 0.001987204) mbar_fe = DeltaF_ij[0, nstates - 1] * 298.0 * 0.001987204 dmbar_fe = dDeltaF_ij[0, nstates - 1] * 298.0 * 0.001987204 #write the free energy mbar_file = open("freenrg-MBAR.dat", "w") mbar_file.write("\n") mbar_file.write("Free energy differences matrix from MBAR in reduced units:") mbar_file.write(DeltaF_ij)