def finite_difference_bar(w, delta): fd_pymbar = np.zeros_like(w) for i in range(2): for j in range(len(w[0])): original = pymbar.BAR(w[0], w[1])[0] # central difference w[i][j] += 0.5 * delta left_edge = pymbar.BAR(w[0], w[1])[0] w[i][j] -= delta right_edge = pymbar.BAR(w[0], w[1])[0] fd = (left_edge - right_edge) / delta fd_pymbar[i][j] = fd w[i][j] += 0.5 * delta return fd_pymbar
def get_free_energies(self, environment): """ Estimate the free energies between all pairs with bidirectional transitions of chemical states in the given environment Parameters ---------- environment : str The name of the environment for which free energies are desired Returns ------- free_energies : dict of (str, str): [float, float] Dictionary of pairwaise free energies and their uncertainty, computed with bootstrapping """ logP_without_sams = self.extract_logP_values(environment, "logP_accept", subtract_sams=True) free_energies = {} n_bootstrap_iterations = 10000000 for state_pair, logP_accepts in logP_without_sams.items(): w_F = logP_accepts[0] w_R = -logP_accepts[1] bootstrapped_bar = np.zeros(n_bootstrap_iterations) for i in range(n_bootstrap_iterations): resampled_w_F = np.random.choice(w_F, len(w_F), replace=True) resampled_w_R = np.random.choice(w_R, len(w_R), replace=True) [df, ddf] = pymbar.BAR(resampled_w_F, resampled_w_R) bootstrapped_bar[i] = df free_energies[state_pair] = [np.mean(bootstrapped_bar), np.std(bootstrapped_bar)] return free_energies
def bootstrap_BAR(w_F, w_R, repeats, sample_proportion): """ :param w_F: ndarray :param w_R: ndarray :param repeats: int :return: std, float """ assert 0 <= sample_proportion <= 1, "sample_proportion out of range" n_F = int(len(w_F) * sample_proportion) n_R = int(len(w_R) * sample_proportion) delta_Fs = [] for _ in range(repeats): w_F_rand = np.random.choice(w_F, size=n_F, replace=True) w_R_rand = np.random.choice(w_R, size=n_R, replace=True) df = pymbar.BAR(w_F_rand, w_R_rand, compute_uncertainty=False, relative_tolerance=1e-6, verbose=False) delta_Fs.append(df) delta_Fs = np.asarray(delta_Fs) delta_Fs = delta_Fs[~np.isnan(delta_Fs)] delta_Fs = delta_Fs[~np.isinf(delta_Fs)] df_mean = delta_Fs.mean() df_std = delta_Fs.std() return df_mean, df_std
def bennett(w_F, w_R): """ Bennett Acceptance Ratio C. Bennett. Efficient Estimation of Free Energy Differences from Monte Carlo Data. Journal of Computational Physics 22, 245-268 (1976). G. Crooks. Path-ensemble averages in systems driven far from equilibrium. Physical Review E 61, 2361-2366 (2000). M. Shirts, E. Bair, G. Hooker, and V. Pande. Equilibrium Free Energies from Nonequilibrium Measurements Using Maximum-Likelihood Methods. Physical Review Letters 91, 140601 (2003). need pymmbar (https://github.com/choderalab/pymbar): :param w_F: ndarray with shape (NF,) works done in forward direction starting from the initial (A) equilibrium ensemble, in unit kT :param w_R: ndarray with shape (NR,) works done in forward direction starting from the initial (A) equilibrium ensemble, in unit of kT :return: df_AB : float free energy difference between states A and B (df_AB = f_B - f_A), in unit of kT """ assert w_F.ndim == w_R.ndim == 1, "w_F, w_R must be 1d arrays" df_AB, ddf = pymbar.BAR(w_F, w_R, relative_tolerance=0.000001, verbose=False, compute_uncertainty=True) return df_AB
def _run_mbar(u_kln, N_k): K = len(N_k) f_k_BAR = np.zeros(K) for k in range(K - 2): w_F = u_kln[k, k + 1, :N_k[k]] - u_kln[k, k, :N_k[k]] w_R = u_kln[k + 1, k, :N_k[k + 1]] - u_kln[k + 1, k + 1, :N_k[k + 1]] f_k_BAR[k + 1] = pymbar.BAR(w_F, w_R, relative_tolerance=0.000001, verbose=False, compute_uncertainty=False) f_k_BAR = np.cumsum(f_k_BAR) mbar = pymbar.MBAR(u_kln, N_k, verbose=True, initial_f_k=f_k_BAR) return mbar
def calculate(self, temp=300.): """Calculate the free energy difference and return a PMF object. Parameters ---------- temp: float, optional temperature of calculation """ beta = 1. / (sim.boltz * temp) pmf_values = [0.0] for low_lam, high_lam in zip(self.data, self.data[1:]): pmf_values.append( pmf_values[-1] + pymbar.BAR(-low_lam[1] * beta, -high_lam[0] * beta)[0] / beta) return PMF(self.lambdas, pmf_values)
def run_mbar(u_kln, N_k): """ :param u_kln: 3d numpy array, reduced potential energy :param N_k: 1d numpy array, number of samples at state k :return: mbar, an object of pymbar.MBAR """ K = len(N_k) f_k_BAR = np.zeros(K) for k in range(K-2): w_F = u_kln[ k, k+1, :N_k[k] ] - u_kln[ k, k, :N_k[k] ] w_R = u_kln[ k+1, k, :N_k[k+1] ] - u_kln[ k+1, k+1, :N_k[k+1] ] f_k_BAR[k+1] = pymbar.BAR(w_F, w_R, relative_tolerance=0.000001, \ verbose=False, compute_uncertainty=False) f_k_BAR = np.cumsum(f_k_BAR) mbar = pymbar.MBAR(u_kln, N_k, verbose = True, initial_f_k = f_k_BAR) return mbar
def dG_dw(w): """ A function that calculates gradient of free energy difference with respect to work Parameters --------- w : np.ndarray, float, (2, N) forward and reverse work for N frames Returns ------ np.ndarray, float, (2, N) the gradient of free energy difference with respect to work """ dG, _ = pymbar.BAR(w[0], w[1]) dBAR_dw = jax.grad(BARzero, argnums=(0,)) dBAR_dA = jax.grad(BARzero, argnums=(1,)) dG_dw = -dBAR_dw(w,dG)[0]/dBAR_dA(w,dG)[0] return dG_dw
def _bennett_acceptance_ratio_pymbar(forward_work, reverse_work, compute_uncertainty=True, maximum_iterations=500, relative_tolerance=1e-12): """pymbar reference implementation""" import pymbar ctx = {"device": forward_work.device, "dtype": forward_work.dtype} f = io.StringIO() with redirect_stdout(f): result = pymbar.BAR(w_F=as_numpy(forward_work), w_R=as_numpy(reverse_work), return_dict=False, compute_uncertainty=compute_uncertainty, maximum_iterations=maximum_iterations, relative_tolerance=relative_tolerance) if "poor overlap" in f.getvalue() or (compute_uncertainty and np.isnan(result[1])): return torch.tensor(np.nan, **ctx), torch.tensor(np.nan, **ctx) if compute_uncertainty: return torch.tensor(result[0], **ctx), torch.tensor(result[1], **ctx) else: return torch.tensor(result, **ctx), None
def run_alchemical_langevin_integrator(nsteps=0, splitting="O { V R H R V } O"): """Check that the AlchemicalLangevinSplittingIntegrator reproduces the analytical free energy difference for a harmonic oscillator deformation, using BAR. Up to 6*sigma is tolerated for error. The total work (protocol work + shadow work) is used. """ #max deviation from the calculated free energy NSIGMA_MAX = 6 n_iterations = 100 # number of forward and reverse protocols # These are the alchemical functions that will be used to control the system temperature = 298.0 * unit.kelvin sigma = 1.0 * unit.angstrom # stddev of harmonic oscillator kT = kB * temperature # thermal energy beta = 1.0 / kT # inverse thermal energy K = kT / sigma**2 # spring constant corresponding to sigma mass = 39.948 * unit.amu period = unit.sqrt(mass/K) # period of harmonic oscillator timestep = period / 20.0 collision_rate = 1.0 / period dF_analytical = 1.0 parameters = dict() parameters['testsystems_HarmonicOscillator_x0'] = (0 * sigma, 2 * sigma) parameters['testsystems_HarmonicOscillator_U0'] = (0 * kT, 1 * kT) forward_functions = { name : '(1-lambda)*%f + lambda*%f' % (value[0].value_in_unit_system(unit.md_unit_system), value[1].value_in_unit_system(unit.md_unit_system)) for (name, value) in parameters.items() } reverse_functions = { name : '(1-lambda)*%f + lambda*%f' % (value[1].value_in_unit_system(unit.md_unit_system), value[0].value_in_unit_system(unit.md_unit_system)) for (name, value) in parameters.items() } # Create harmonic oscillator testsystem testsystem = testsystems.HarmonicOscillator(K=K, mass=mass) system = testsystem.system positions = testsystem.positions # Get equilibrium samples from initial and final states burn_in = 5 * 20 # 5 periods thinning = 5 * 20 # 5 periods # Collect forward and reverse work values w_f = np.zeros([n_iterations], np.float64) w_r = np.zeros([n_iterations], np.float64) platform = openmm.Platform.getPlatformByName("Reference") for direction in ['forward', 'reverse']: positions = testsystem.positions for iteration in range(n_iterations): # Generate equilibrium sample equilibrium_integrator = GHMCIntegrator(temperature=temperature, collision_rate=collision_rate, timestep=timestep) equilibrium_context = openmm.Context(system, equilibrium_integrator, platform) for (name, value) in parameters.items(): if direction == 'forward': equilibrium_context.setParameter(name, value[0].value_in_unit_system(unit.md_unit_system)) else: equilibrium_context.setParameter(name, value[1].value_in_unit_system(unit.md_unit_system)) equilibrium_context.setPositions(positions) equilibrium_integrator.step(thinning) positions = equilibrium_context.getState(getPositions=True).getPositions(asNumpy=True) del equilibrium_context, equilibrium_integrator # Generate nonequilibrium work sample if direction == 'forward': alchemical_functions = forward_functions else: alchemical_functions = reverse_functions nonequilibrium_integrator = AlchemicalNonequilibriumLangevinIntegrator(temperature=temperature, collision_rate=collision_rate, timestep=timestep, alchemical_functions=alchemical_functions, splitting=splitting, nsteps_neq=nsteps, measure_shadow_work=True) nonequilibrium_context = openmm.Context(system, nonequilibrium_integrator, platform) nonequilibrium_context.setPositions(positions) if nsteps == 0: nonequilibrium_integrator.step(1) # need to execute at least one step else: nonequilibrium_integrator.step(nsteps) if direction == 'forward': w_f[iteration] = nonequilibrium_integrator.get_total_work(dimensionless=True) else: w_r[iteration] = nonequilibrium_integrator.get_total_work(dimensionless=True) del nonequilibrium_context, nonequilibrium_integrator dF, ddF = pymbar.BAR(w_f, w_r) nsigma = np.abs(dF - dF_analytical) / ddF print("analytical DeltaF: {:12.4f}, DeltaF: {:12.4f}, dDeltaF: {:12.4f}, nsigma: {:12.1f}".format(dF_analytical, dF, ddF, nsigma)) if nsigma > NSIGMA_MAX: raise Exception("The free energy difference for the nonequilibrium switching for splitting '%s' and %d steps is not zero within statistical error." % (splitting, nsteps))
def deltaG_from_results(model, results, sys_params) -> Tuple[float, float, List]: assert len(sys_params) == len(model.unbound_potentials) bound_potentials = [] for params, unbound_pot in zip(sys_params, model.unbound_potentials): bp = unbound_pot.bind(np.asarray(params)) bound_potentials.append(bp) if model.endpoint_correct: sim_results = results[:-1] else: sim_results = results U_knk = [] N_k = [] for result in sim_results: U_knk.append(result.lambda_us) N_k.append(len(result.lambda_us)) # number of frames U_knk = np.array(U_knk) bar_dG = 0 bar_dG_err = 0 delta_Us = extract_delta_Us_from_U_knk(U_knk) for lambda_idx in range(len(model.lambda_schedule) - 1): fwd_delta_u = model.beta * delta_Us[lambda_idx][0] rev_delta_u = model.beta * delta_Us[lambda_idx][1] dG_exact, exact_bar_err = pymbar.BAR(fwd_delta_u, rev_delta_u) bar_dG += dG_exact / model.beta exact_bar_overlap = endpoint_correction.overlap_from_cdf( fwd_delta_u, rev_delta_u) # probably off by a factor of two since we re-use samples. bar_dG_err += (exact_bar_err / model.beta)**2 lamb_start = model.lambda_schedule[lambda_idx] lamb_end = model.lambda_schedule[lambda_idx + 1] print( f"{model.prefix}_BAR: lambda {lamb_start:.3f} -> {lamb_end:.3f} dG: {dG_exact/model.beta:.3f} dG_err: {exact_bar_err/model.beta:.3f} overlap: {exact_bar_overlap:.3f}" ) # for MBAR we need to sanitize the energies clean_U_knks = [] # [K, F, K] for lambda_idx, full_us in enumerate(U_knk): clean_U_knks.append(sanitize_energies(full_us, lambda_idx)) print( model.prefix, " MBAR: amin", np.amin(clean_U_knks), "median", np.median(clean_U_knks), "max", np.amax(clean_U_knks), ) K = len(model.lambda_schedule) clean_U_knks = np.array(clean_U_knks) # [K, F, K] U_kn = np.reshape(clean_U_knks, (-1, K)).transpose() # [K, F*K] u_kn = U_kn * model.beta np.save(model.prefix + "_U_kn.npy", U_kn) mbar = pymbar.MBAR(u_kn, N_k) differences, error_estimates = mbar.getFreeEnergyDifferences() f_k, error_k = differences[0], error_estimates[0] mbar_dG = f_k[-1] / model.beta mbar_dG_err = error_k[-1] / model.beta bar_dG_err = np.sqrt(bar_dG_err) dG = bar_dG # use the exact answer if model.endpoint_correct: core_restr = bound_potentials[-1] # (ytz): tbd, automatically find optimal k_translation/k_rotation such that # standard deviation and/or overlap is maximized k_translation = 200.0 k_rotation = 100.0 start = time.time() lhs_du, rhs_du, rotation_samples, translation_samples = endpoint_correction.estimate_delta_us( k_translation=k_translation, k_rotation=k_rotation, core_idxs=core_restr.get_idxs(), core_params=core_restr.params.reshape((-1, 2)), beta=model.beta, lhs_xs=results[-2].xs, rhs_xs=results[-1].xs, seed=2021, ) dG_endpoint, endpoint_err = pymbar.BAR(model.beta * lhs_du, model.beta * np.array(rhs_du)) dG_endpoint = dG_endpoint / model.beta endpoint_err = endpoint_err / model.beta # compute standard state corrections for translation and rotation dG_ssc_translation, dG_ssc_rotation = standard_state.release_orientational_restraints( k_translation, k_rotation, model.beta) overlap = endpoint_correction.overlap_from_cdf(lhs_du, rhs_du) lhs_mean = np.mean(lhs_du) rhs_mean = np.mean(rhs_du) print( f"{model.prefix} bar (A) {bar_dG:.3f} bar_err {bar_dG_err:.3f} mbar (A) {mbar_dG:.3f} mbar_err {mbar_dG_err:.3f} dG_endpoint (E) {dG_endpoint:.3f} dG_endpoint_err {endpoint_err:.3f} dG_ssc_translation {dG_ssc_translation:.3f} dG_ssc_rotation {dG_ssc_rotation:.3f} overlap {overlap:.3f} lhs_mean {lhs_mean:.3f} rhs_mean {rhs_mean:.3f} lhs_n {len(lhs_du)} rhs_n {len(rhs_du)} | time: {time.time()-start:.3f}s" ) dG += dG_endpoint + dG_ssc_translation + dG_ssc_rotation bar_dG_err = np.sqrt(bar_dG_err**2 + endpoint_err**2) else: print( f"{model.prefix} bar (A) {bar_dG:.3f} bar_err {bar_dG_err:.3f} mbar (A) {mbar_dG:.3f} mbar_err {mbar_dG_err:.3f} " ) return dG, bar_dG_err, results
def check_2d( traj1, traj2, param1, param2, kb, pvconvert, quantity, dtempdpress=False, dtempdmu=False, cutoff=0.001, seed=None, bs_error=True, bs_repetitions=200, verbosity=1, screen=False, filename=None, ): r""" Checks whether the energy trajectories of two simulation performed at different temperatures have sampled distributions at the analytically expected ratio. Parameters ---------- traj1 : array-like, 2d Trajectory of the first simulation If dtempdpress: * traj[0,:]: Potential energy U or total energy E = U + K * traj[1,:]: Volume V traj2 : array-like, 2d Trajectory of the second simulation If dtempdpress: * traj[0,:]: Potential energy U or total energy E = U + K * traj[1,:]: Volume V param1 : array-like If dtempdpress: Target temperature and pressure of the first simulation param2 : array-like If dtempdpress: Target temperature and pressure of the first simulation kb : float Boltzmann constant in same units as the energy trajectories pvconvert : float Conversion from pressure * volume to energy units quantity : List[str] Names of quantities analyzed (used for printing only) dtempdpress : bool, optional Set to True if trajectories were simulated at different temperature and pressure Default: False. dtempdmu : bool, optional Set to True if trajectories were simulated at different temperature and chemical potential Default: False. cutoff : float Tail cutoff of distributions. Default: 0.001 (0.1%) seed : int If set, bootstrapping will be reproducible. Default: None, bootstrapping non-reproducible. bs_error : bool Calculate the standard error via bootstrap resampling Default: True bs_repetitions : int Number of bootstrap repetitions drawn Default: 200 verbosity : int Verbosity level. Default: 1 (only most important output) screen : bool, optional Plot distributions on screen. Default: False. filename : string, optional Plot distributions to `filename`.pdf. Default: None. Returns ------- """ if not (dtempdpress or dtempdmu) or (dtempdpress and dtempdmu): raise pv_error.InputError( ["dtempdpress", "dtempdmu"], "Need to specify exactly one of `dtempdpress` and `dtempdmu`.", ) if dtempdmu: raise NotImplementedError( "check_2d: Testing of `dtempdmu` not implemented.") if screen or filename is not None: raise NotImplementedError("check_2d: Plotting not implemented.") # =============================== # # prepare constants, strings etc. # # =============================== # pstring = ("ln(P_2(" + quantity[0] + ", " + quantity[1] + ")/" + "P_1(" + quantity[0] + ", " + quantity[1] + "))") trueslope = np.zeros(2) if dtempdpress: trueslope = np.array([ 1 / (kb * param1[0]) - 1 / (kb * param2[0]), pvconvert * (1 / (kb * param1[0]) * param1[1] - 1 / (kb * param2[0]) * param2[1]), ]) if verbosity > 1: print("Analytical slope of {:s}: {:.8f}, {:.8f}".format( pstring, trueslope[0], trueslope[1])) quant = {} # ==================== # # prepare trajectories # # ==================== # # Discard burn-in period and time-correlated frames traj1 = trajectory.prepare(traj1, cut=cutoff, verbosity=verbosity, name="Trajectory 1") traj2 = trajectory.prepare(traj2, cut=cutoff, verbosity=verbosity, name="Trajectory 2") # calculate overlap traj1_full = traj1 traj2_full = traj2 traj1, traj2, min_ene, max_ene = trajectory.overlap( traj1=traj1_full, traj2=traj2_full, ) if verbosity > 0: print("Overlap is {:.1%} of trajectory 1 and {:.1%} of trajectory 2.". format( traj1.shape[1] / traj1_full.shape[1], traj2.shape[1] / traj2_full.shape[1], )) if verbosity > 0 and dtempdpress: cov1 = np.cov(traj1_full) sig1 = np.sqrt(np.diag(cov1)) sig1[1] *= pvconvert cov2 = np.cov(traj2_full) sig2 = np.sqrt(np.diag(cov2)) sig2[1] *= pvconvert dt1 = 2 * kb * param1[0] * param1[0] / sig1[0] dt2 = 2 * kb * param2[0] * param2[0] / sig2[0] dp1 = 2 * kb * param1[0] / sig1[1] dp2 = 2 * kb * param2[0] / sig2[1] if verbosity > 1: print( "A rule of thumb states that a good overlap can be expected when choosing state\n" "points separated by about 2 standard deviations.\n" "For the current trajectories, dT = {:.1f}, and dP = {:.1f},\n" "with standard deviations sig1 = [{:.1f}, {:.1g}], and sig2 = [{:.1f}, {:.1g}].\n" "According to the rule of thumb, given point 1, the estimate is dT = {:.1f}, dP = {:.1f}, and\n" " given point 2, the estimate is dT = {:.1f}, dP = {:.1f}." .format( param2[0] - param1[0], param2[1] - param1[1], sig1[0], sig1[1], sig2[0], sig2[1], dt1, dt2, dp1, dp2, )) print( "Rule of thumb estimates that (dT,dP) = ({:.1f},{:.1f}) would be optimal " "(currently, (dT,dP) = ({:.1f},{:.1f}))".format( 0.5 * (dt1 + dt2), 0.5 * (dp1 + dp2), param2[0] - param1[0], param2[1] - param1[1], )) if min_ene is None: raise pv_error.InputError(["traj1", "traj2"], "No overlap between trajectories.") # calculate inefficiency g1 = np.array([ pymbar.timeseries.statisticalInefficiency(traj1[0]), pymbar.timeseries.statisticalInefficiency(traj1[1]), ]) g2 = np.array([ pymbar.timeseries.statisticalInefficiency(traj2[0]), pymbar.timeseries.statisticalInefficiency(traj2[1]), ]) w_f = -trueslope[0] * traj1[0] - trueslope[1] * traj1[1] w_r = trueslope[0] * traj2[0] + trueslope[1] * traj2[1] if verbosity > 2: print("Computing log of partition functions using pymbar.BAR...") df, ddf = pymbar.BAR(w_f, w_r) if verbosity > 2: print( "Using {:.5f} for log of partition functions as computed from BAR." .format(df)) print("Uncertainty in quantity is {:.5f}.".format(ddf)) print( "Assuming this is negligible compared to sampling error at individual points." ) # ================== # # max-likelihood fit # # ================== # if verbosity > 2: print("Computing the maximum likelihood parameters") fitvals, dfitvals = do_max_likelihood_fit( traj1, traj2, g1, g2, init_params=[df, trueslope[0], trueslope[1]], verbose=(verbosity > 1), ) slope = fitvals[1:] dslope = dfitvals[1:] quant["maxLikelihood"] = np.abs((slope - trueslope) / dslope) if verbosity > 0: print_stats( title="Maximum Likelihood Analysis (analytical error)", fitvals=fitvals, dfitvals=dfitvals, kb=kb, param1=param1, param2=param2, trueslope=trueslope, pvconvert=pvconvert, dtempdpress=dtempdpress, dtempdmu=dtempdmu, ) if not bs_error: return quant["maxLikelihood"] # =============================== # # bootstrapped max-likelihood fit # # =============================== # if verbosity > 2: print("Computing bootstrapped maximum likelihood parameters") if seed is not None: np.random.seed(seed) bs_fitvals = [] for t1, t2 in zip( trajectory.bootstrap(traj1, bs_repetitions), trajectory.bootstrap(traj2, bs_repetitions), ): # use overlap region t1, t2, min_ene, max_ene = trajectory.overlap(traj1=t1, traj2=t2) # calculate inefficiency g1 = np.array([ pymbar.timeseries.statisticalInefficiency(t1[0]), pymbar.timeseries.statisticalInefficiency(t1[1]), ]) g2 = np.array([ pymbar.timeseries.statisticalInefficiency(t2[0]), pymbar.timeseries.statisticalInefficiency(t2[1]), ]) # calculate max_likelihood fit fv, _ = do_max_likelihood_fit( t1, t2, g1, g2, init_params=[df, trueslope[0], trueslope[1]], verbose=(verbosity > 2), ) bs_fitvals.append(fv) bs_fitvals = np.array(bs_fitvals) # slope = np.average(fitvals[:, 1:]) dslope = np.std(bs_fitvals[:, 1:], axis=0) quant["bootstrap"] = np.abs((slope - trueslope) / dslope) if verbosity > 0: print_stats( title="Maximum Likelihood Analysis (bootstrapped error)", fitvals=np.concatenate(([fitvals], bs_fitvals)), dfitvals=None, kb=kb, param1=param1, param2=param2, trueslope=trueslope, pvconvert=pvconvert, dtempdpress=dtempdpress, dtempdmu=dtempdmu, ) return quant["bootstrap"]
def test_periodic_langevin_integrator(splitting="H V R O R V H", ncycles=40, nsteps_neq=1000, nsteps_eq=1000, write_trajectory=False): """ Test PeriodicNonequilibriumIntegrator Parameters ---------- integrator_flavor : openmmtools.integrator.PeriodicNonequilibriumIntegrator (or subclass) integrator to run ncycles : int, optional, default=40 number of cycles nsteps_neq : int, optional, default=1000 number of forward/backward annealing steps nsteps_eq : int, optional, default=1000 number of equilibration steps to run at endstates before annealing write_trajectory : bool, optional, default=True If True, will generate a PDB file that contains the harmonic oscillator trajectory """ #max deviation from the calculated free energy NSIGMA_MAX = 6 # These are the alchemical functions that will be used to control the system temperature = 298.0 * unit.kelvin sigma = 1.0 * unit.angstrom # stddev of harmonic oscillator kT = kB * temperature # thermal energy beta = 1.0 / kT # inverse thermal energy K = kT / sigma**2 # spring constant corresponding to sigma mass = 39.948 * unit.amu period = unit.sqrt(mass / K) # period of harmonic oscillator timestep = period / 20.0 collision_rate = 1.0 / period dF_analytical = 5.0 parameters = dict() displacement = 10 * sigma parameters['testsystems_HarmonicOscillator_x0'] = (0 * sigma, displacement) parameters['testsystems_HarmonicOscillator_U0'] = (0 * kT, 5 * kT) integrator_kwargs = { 'temperature': temperature, 'collision_rate': collision_rate, 'timestep': timestep, 'measure_shadow_work': False, 'measure_heat': False } alchemical_functions = { name: '(1-lambda)*%f + lambda*%f' % (value[0].value_in_unit_system(unit.md_unit_system), value[1].value_in_unit_system(unit.md_unit_system)) for (name, value) in parameters.items() } # Create harmonic oscillator testsystem testsystem = testsystems.HarmonicOscillator(K=K, mass=mass) system = testsystem.system positions = testsystem.positions topology = testsystem.topology # Create integrator from openmmtools.integrators import PeriodicNonequilibriumIntegrator integrator = PeriodicNonequilibriumIntegrator( alchemical_functions=alchemical_functions, splitting=splitting, nsteps_eq=nsteps_eq, nsteps_neq=nsteps_neq, **integrator_kwargs) platform = openmm.Platform.getPlatformByName("Reference") context = openmm.Context(system, integrator, platform) context.setPositions(positions) nsteps_per_cycle = nsteps_eq + nsteps_neq + nsteps_eq + nsteps_neq assert integrator.getGlobalVariableByName( "n_steps_per_cycle") == nsteps_per_cycle if write_trajectory: from simtk.openmm.app import PDBFile filename = 'neq-trajectory.pdb' print(f'Writing trajectory to {filename}') with open(filename, 'wt') as outfile: # Write reference import copy pos1 = copy.deepcopy(positions) pos2 = copy.deepcopy(positions) pos2[0, 0] += displacement PDBFile.writeModel(topology, pos1, outfile) PDBFile.writeModel(topology, pos2, outfile) interval = 10 PDBFile.writeModel(topology, positions, outfile, modelIndex=0) for step in range(0, 2 * nsteps_per_cycle, interval): integrator.step(interval) positions = context.getState(getPositions=True).getPositions( asNumpy=True) PDBFile.writeModel(topology, positions, outfile, modelIndex=step) PDBFile.writeModel(topology, pos1, outfile) PDBFile.writeModel(topology, pos2, outfile) # Reset the integrator integrator.reset() step = 0 for cycle in range(2): # eq (0) for i in range(nsteps_eq): integrator.step(1) step += 1 assert integrator.getGlobalVariableByName("step") == ( step % nsteps_per_cycle) assert np.isclose(integrator.getGlobalVariableByName("lambda"), 0.0) # neq (0 -> 1) for i in range(nsteps_neq): integrator.step(1) step += 1 assert integrator.getGlobalVariableByName("step") == ( step % nsteps_per_cycle) assert np.isclose( integrator.getGlobalVariableByName("lambda"), (i + 1) / nsteps_neq ), f'{step} {integrator.getGlobalVariableByName("lambda")}' # eq (1) for i in range(nsteps_eq): integrator.step(1) step += 1 assert integrator.getGlobalVariableByName("step") == ( step % nsteps_per_cycle) assert np.isclose(integrator.getGlobalVariableByName("lambda"), 1.0) # neq (1 -> 0) for i in range(nsteps_neq): integrator.step(1) step += 1 assert integrator.getGlobalVariableByName("step") == ( step % nsteps_per_cycle) assert np.isclose(integrator.getGlobalVariableByName("lambda"), 1 - (i + 1) / nsteps_neq) assert np.isclose(integrator.getGlobalVariableByName("lambda"), 0.0) # Reset the integrator integrator.reset() forward_works, reverse_works = list(), list() for _ in range(ncycles): # Equilibrium (lambda = 0) integrator.step(nsteps_eq) # Forward (0 -> 1) initial_work = integrator.get_protocol_work(dimensionless=True) integrator.step(nsteps_neq) final_work = integrator.get_protocol_work(dimensionless=True) forward_work = final_work - initial_work forward_works.append(forward_work) # Equilibrium (lambda = 1) integrator.step(nsteps_eq) # Reverse work (1 -> 0) initial_work = integrator.get_protocol_work(dimensionless=True) integrator.step(nsteps_neq) final_work = integrator.get_protocol_work(dimensionless=True) reverse_work = final_work - initial_work reverse_works.append(reverse_work) print(np.array(forward_works).std()) print(np.array(reverse_works).std()) dF, ddF = pymbar.BAR(np.array(forward_works), np.array(reverse_works)) nsigma = np.abs(dF - dF_analytical) / ddF assert np.isclose(integrator.getGlobalVariableByName("lambda"), 0.0) print( "analytical DeltaF: {:12.4f}, DeltaF: {:12.4f}, dDeltaF: {:12.4f}, nsigma: {:12.1f}" .format(dF_analytical, dF, ddF, nsigma)) if nsigma > NSIGMA_MAX: raise Exception( f"The free energy difference for the nonequilibrium switching for splitting {splitting} is not zero within statistical error." ) # Clean up del context del integrator
def run_alchemical_langevin_integrator(nsteps=0, splitting="O { V R H R V } O"): """Check that the AlchemicalLangevinSplittingIntegrator reproduces the analytical free energy difference for a harmonic oscillator deformation, using BAR. Up to 6*sigma is tolerated for error. The total work (protocol work + shadow work) is used. """ #max deviation from the calculated free energy NSIGMA_MAX = 6 n_iterations = 200 # number of forward and reverse protocols # These are the alchemical functions that will be used to control the system temperature = 298.0 * unit.kelvin sigma = 1.0 * unit.angstrom # stddev of harmonic oscillator kT = kB * temperature # thermal energy beta = 1.0 / kT # inverse thermal energy K = kT / sigma**2 # spring constant corresponding to sigma mass = 39.948 * unit.amu period = unit.sqrt(mass / K) # period of harmonic oscillator timestep = period / 20.0 collision_rate = 1.0 / period dF_analytical = 1.0 parameters = dict() parameters['testsystems_HarmonicOscillator_x0'] = (0 * sigma, 2 * sigma) parameters['testsystems_HarmonicOscillator_U0'] = (0 * kT, 1 * kT) alchemical_functions = { 'forward': { name: '(1-lambda)*%f + lambda*%f' % (value[0].value_in_unit_system(unit.md_unit_system), value[1].value_in_unit_system(unit.md_unit_system)) for (name, value) in parameters.items() }, 'reverse': { name: '(1-lambda)*%f + lambda*%f' % (value[1].value_in_unit_system(unit.md_unit_system), value[0].value_in_unit_system(unit.md_unit_system)) for (name, value) in parameters.items() }, } # Create harmonic oscillator testsystem testsystem = testsystems.HarmonicOscillator(K=K, mass=mass) system = testsystem.system positions = testsystem.positions # Get equilibrium samples from initial and final states burn_in = 5 * 20 # 5 periods thinning = 5 * 20 # 5 periods # Collect forward and reverse work values directions = ['forward', 'reverse'] work = { direction: np.zeros([n_iterations], np.float64) for direction in directions } platform = openmm.Platform.getPlatformByName("Reference") for direction in directions: positions = testsystem.positions # Create equilibrium and nonequilibrium integrators equilibrium_integrator = GHMCIntegrator(temperature=temperature, collision_rate=collision_rate, timestep=timestep) nonequilibrium_integrator = AlchemicalNonequilibriumLangevinIntegrator( temperature=temperature, collision_rate=collision_rate, timestep=timestep, alchemical_functions=alchemical_functions[direction], splitting=splitting, nsteps_neq=nsteps, measure_shadow_work=True) # Create compound integrator compound_integrator = openmm.CompoundIntegrator() compound_integrator.addIntegrator(equilibrium_integrator) compound_integrator.addIntegrator(nonequilibrium_integrator) # Create Context context = openmm.Context(system, compound_integrator, platform) context.setPositions(positions) # Collect work samples for iteration in range(n_iterations): # # Generate equilibrium sample # compound_integrator.setCurrentIntegrator(0) equilibrium_integrator.reset() compound_integrator.step(thinning) # # Generate nonequilibrium work sample # compound_integrator.setCurrentIntegrator(1) nonequilibrium_integrator.reset() # Check initial conditions after reset current_lambda = nonequilibrium_integrator.getGlobalVariableByName( 'lambda') assert current_lambda == 0.0, 'initial lambda should be 0.0 (was %f)' % current_lambda current_step = nonequilibrium_integrator.getGlobalVariableByName( 'step') assert current_step == 0.0, 'initial step should be 0 (was %f)' % current_step compound_integrator.step(max( 1, nsteps)) # need to execute at least one step work[direction][ iteration] = nonequilibrium_integrator.get_total_work( dimensionless=True) # Check final conditions before reset current_lambda = nonequilibrium_integrator.getGlobalVariableByName( 'lambda') assert current_lambda == 1.0, 'final lambda should be 1.0 (was %f) for splitting %s' % ( current_lambda, splitting) current_step = nonequilibrium_integrator.getGlobalVariableByName( 'step') assert int(current_step) == max( 1, nsteps ), 'final step should be %d (was %f) for splitting %s' % (max( 1, nsteps), current_step, splitting) nonequilibrium_integrator.reset() # Clean up del context del compound_integrator dF, ddF = pymbar.BAR(work['forward'], work['reverse']) nsigma = np.abs(dF - dF_analytical) / ddF print( "analytical DeltaF: {:12.4f}, DeltaF: {:12.4f}, dDeltaF: {:12.4f}, nsigma: {:12.1f}" .format(dF_analytical, dF, ddF, nsigma)) if nsigma > NSIGMA_MAX: raise Exception( "The free energy difference for the nonequilibrium switching for splitting '%s' and %d steps is not zero within statistical error." % (splitting, nsteps))
def current_free_energy_estimate(self): [df, ddf] = pymbar.BAR(self._forward_total_work, self._reverse_total_work) return [df, ddf]
def check_1d( traj1, traj2, param1, param2, kb, quantity, dtemp=False, dpress=False, dmu=False, temp=None, pvconvert=None, nbins=40, cutoff=0.001, seed=None, bs_error=True, bs_repetitions=200, verbosity=1, screen=False, filename=None, xlabel="Energy", xunit=None, ): r""" Checks whether the energy trajectories of two simulation performed at different temperatures have sampled distributions at the analytically expected ratio. Parameters ---------- traj1 : array-like Trajectory of the first simulation If dtemp: * NVT: Potential energy U or total energy E = U + K * NPT: Enthalpy H = U + pV or total energy E = H + K If dpress: * NPT: Volume V traj2 : array-like Trajectory of the second simulation If dtemp: * NVT: Potential energy U or total energy E = U + K * NPT: Enthalpy H = U + pV or total energy E = H + K If dpress: * NPT: Volume V param1 : float Target temperature or pressure of the first simulation param2 : float Target temperature or pressure of the second simulation kb : float Boltzmann constant in same units as the energy trajectories quantity : str Name of quantity analyzed (used for printing only) dtemp : bool, optional Set to True if trajectories were simulated at different temperature Default: False. dpress : bool, optional Set to True if trajectories were simulated at different pressure Default: False. temp : float, optional The temperature in equal temperature, differring pressure NPT simulations. Needed to print optimal dP. pvconvert : float, optional Conversion from pressure * volume to energy units. Needed to print optimal dP. dmu : bool, optional Set to True if trajectories were simulated at different chemical potential Default: False. nbins : int, optional Number of bins used to assess distributions of the trajectories Default: 40 cutoff : float, optional Tail cutoff of distributions. Default: 0.001 (0.1%) seed : int, optional If set, bootstrapping will be reproducible. Default: None, bootstrapping non-reproducible. bs_error : bool Calculate the standard error via bootstrap resampling Default: True bs_repetitions : int Number of bootstrap repetitions drawn Default: 200 verbosity : int, optional Verbosity level. Default: 1 (only most important output) screen : bool, optional Plot distributions on screen. Default: False. filename : string, optional Plot distributions to `filename`.pdf. Default: None. xlabel : string, optional x-axis label used for plotting Default: 'Energy' xunit : string, optional x-axis label unit used for plotting Default: None Returns ------- """ if (not (dtemp or dpress or dmu) or (dtemp and dpress) or (dtemp and dmu) or (dpress and dmu)): raise pv_error.InputError( ["dtemp", "dpress", "dmu"], "Need to specify exactly one of `dtemp`, `dpress` and `dmu`.", ) if dmu: raise NotImplementedError( "check_1d: Testing of `dmu` not implemented.") if dpress and (temp is None or pvconvert is None): raise pv_error.InputError( ["dpress", "temp", "pvconvert"], "`ensemble.check_1d` with `dpress=True` requires `temp` and `pvconvert`.", ) # =============================== # # prepare constants, strings etc. # # =============================== # pstring = "ln(P_2(" + quantity + ")/P_1(" + quantity + "))" trueslope = 0 if dtemp: trueslope = 1 / (kb * param1) - 1 / (kb * param2) elif dpress: trueslope = (param1 - param2) / (kb * temp) * pvconvert if verbosity > 1: print("Analytical slope of {:s}: {:.8f}".format(pstring, trueslope)) quant = {} # ==================== # # prepare trajectories # # ==================== # # Discard burn-in period and time-correlated frames traj1 = trajectory.prepare(traj1, cut=cutoff, verbosity=verbosity, name="Trajectory 1") traj2 = trajectory.prepare(traj2, cut=cutoff, verbosity=verbosity, name="Trajectory 2") # calculate overlap traj1_full = traj1 traj2_full = traj2 traj1, traj2, min_ene, max_ene = trajectory.overlap(traj1=traj1_full, traj2=traj2_full) if verbosity > 0: print("Overlap is {:.1%} of trajectory 1 and {:.1%} of trajectory 2.". format( traj1.shape[0] / traj1_full.shape[0], traj2.shape[0] / traj2_full.shape[0], )) if verbosity > 0 and dtemp: sig1 = np.std(traj1_full) sig2 = np.std(traj2_full) dt1 = 2 * kb * param1 * param1 / sig1 dt2 = 2 * kb * param2 * param2 / sig2 if verbosity > 1: print( "A rule of thumb states that a good overlap is found when dT/T = (2*kB*T)/(sig),\n" "where sig is the standard deviation of the energy distribution.\n" "For the current trajectories, dT = {:.1f}, sig1 = {:.1f} and sig2 = {:.1f}.\n" "According to the rule of thumb, given T1, a good dT is dT = {:.1f}, and\n" " given T2, a good dT is dT = {:.1f}." .format(param2 - param1, sig1, sig2, dt1, dt2)) print("Rule of thumb estimates that dT = {:.1f} would be optimal " "(currently, dT = {:.1f})".format(0.5 * (dt1 + dt2), param2 - param1)) if verbosity > 0 and dpress: sig1 = np.std(traj1_full) * pvconvert sig2 = np.std(traj2_full) * pvconvert dp1 = 2 * kb * temp / sig1 dp2 = 2 * kb * temp / sig2 if verbosity > 1: print( "A rule of thumb states that a good overlap is found when dP = (2*kB*T)/(sig),\n" "where sig is the standard deviation of the volume distribution.\n" "For the current trajectories, dP = {:.1f}, sig1 = {:.1g} and sig2 = {:.1g}.\n" "According to the rule of thumb, given P1, a good dP is dP = {:.1f}, and\n" " given P2, a good dP is dP = {:.1f}." .format(param2 - param1, sig1, sig2, dp1, dp2)) print("Rule of thumb estimates that dP = {:.1f} would be optimal " "(currently, dP = {:.1f})".format(0.5 * (dp1 + dp2), param2 - param1)) if not min_ene: raise pv_error.InputError(["traj1", "traj2"], "No overlap between trajectories.") # calculate bins bins = np.linspace(min_ene, max_ene, nbins + 1) bins = check_bins(traj1, traj2, bins) if np.size(bins) < 3: raise pv_error.InputError( ["traj1", "traj2", "nbins", "cutoff"], "Less than 3 bins were filled in the overlap region.\n" "Ensure sufficient overlap between the trajectories, and " "consider increasing `cutoff` or `nbins` if there is " "sufficient overlap but unusually long tails.", ) # calculate inefficiency g1 = pymbar.timeseries.statisticalInefficiency(traj1) g2 = pymbar.timeseries.statisticalInefficiency(traj2) w_f = -trueslope * traj1 w_r = trueslope * traj2 if verbosity > 2: print("Computing log of partition functions using pymbar.BAR...") df, ddf = pymbar.BAR(w_f, w_r) if verbosity > 2: print( "Using {:.5f} for log of partition functions as computed from BAR." .format(df)) print("Uncertainty in quantity is {:.5f}.".format(ddf)) print( "Assuming this is negligible compared to sampling error at individual points." ) # ========== # # linear fit # # ========== # if verbosity > 2: print("Computing linear fit parameters (for plotting / comparison)") fitvals, dfitvals = do_linear_fit( traj1=traj1, traj2=traj2, g1=g1, g2=g2, bins=bins, screen=screen, filename=filename, trueslope=trueslope, trueoffset=df, units=xunit, xlabel=xlabel, ylabel=r"$\log\frac{P_2(" + quantity + ")}{P_1(" + quantity + ")}$", ) slope = fitvals[1] dslope = dfitvals[1] quant["linear"] = [abs((slope - trueslope) / dslope)] if verbosity > 1: print_stats( title="Linear Fit Analysis (analytical error)", fitvals=fitvals, dfitvals=dfitvals, kb=kb, param1=param1, param2=param2, trueslope=trueslope, temp=temp, pvconvert=pvconvert, dtemp=dtemp, dpress=dpress, dmu=dmu, ) # ================== # # max-likelihood fit # # ================== # if verbosity > 2: print("Computing the maximum likelihood parameters") fitvals, dfitvals = do_max_likelihood_fit(traj1, traj2, g1, g2, init_params=[df, trueslope], verbose=(verbosity > 1)) slope = fitvals[1] dslope = dfitvals[1] quant["maxLikelihood"] = [abs((slope - trueslope) / dslope)] if (verbosity > 0 and not bs_error) or verbosity > 1: print_stats( title="Maximum Likelihood Analysis (analytical error)", fitvals=fitvals, dfitvals=dfitvals, kb=kb, param1=param1, param2=param2, trueslope=trueslope, temp=temp, pvconvert=pvconvert, dtemp=dtemp, dpress=dpress, dmu=dmu, ) if not bs_error: return quant["maxLikelihood"] # =============================== # # bootstrapped max-likelihood fit # # =============================== # if verbosity > 0: print( "Computing bootstrapped maximum likelihood parameters... " "[0/{:d}]".format(bs_repetitions), end="", ) if seed is not None: np.random.seed(seed) bs_fitvals = [] for n, (t1, t2) in enumerate( zip( trajectory.bootstrap(traj1, bs_repetitions), trajectory.bootstrap(traj2, bs_repetitions), )): # use overlap region t1, t2, min_ene, max_ene = trajectory.overlap(traj1=t1, traj2=t2) # calculate inefficiency g1 = pymbar.timeseries.statisticalInefficiency(t1) g2 = pymbar.timeseries.statisticalInefficiency(t2) # calculate max_likelihood fit fv, _ = do_max_likelihood_fit(t1, t2, g1, g2, init_params=[df, trueslope], verbose=(verbosity > 2)) bs_fitvals.append(fv) # print progress if verbosity > 0: print( "\rComputing bootstrapped maximum likelihood parameters... " "[{:d}/{:d}]".format(n + 1, bs_repetitions), end="", ) print() bs_fitvals = np.array(bs_fitvals) # slope = np.average(fitvals[:, 1]) dslope = np.std(bs_fitvals[:, 1], axis=0) quant["bootstrap"] = [abs((slope - trueslope) / dslope)] if verbosity > 0: print_stats( title="Maximum Likelihood Analysis (bootstrapped error)", fitvals=np.concatenate(([fitvals], bs_fitvals)), dfitvals=None, kb=kb, param1=param1, param2=param2, trueslope=trueslope, temp=temp, pvconvert=pvconvert, dtemp=dtemp, dpress=dpress, dmu=dmu, ) return quant["bootstrap"]
def bayes_factor_v2(model_ini, sample_ini, model_fin, sample_fin, model_ini_name="2c", model_fin_name="rm", aug_with="GaussMix", sigma_robust=False, n_components=1, covariance_type="full", bootstrap=None, sample_proportion=None): """ :param model_ini: pymc3 model :param sample_ini: dict: var_name -> ndarray :param model_fin: pymc3 model :param sample_fin: dict: var_name -> ndarray :param model_ini_name: str :param model_fin_name: str :param aug_with: str :param sigma_robust: bool, only used when aug_with="Normal" :param n_components: int, only used when aug_with="GaussMix" :param covariance_type: str, only used when aug_with="GaussMix" :param bootstrap: int :return: bf if bootstrap is None (bf, err) if bootstrap is an int """ print("Use estimator version 2") if (sample_proportion is not None) and bootstrap is None: raise ValueError( "When sample_proportion = %0.5f, bootstrap must not be None" % sample_proportion) assert aug_with in ["Normal", "Uniform", "GaussMix"], "Unknown aug_with: " + aug_with print("aug_with:", aug_with) ini_fin_name = model_ini_name + "_" + model_fin_name assert ini_fin_name in ["2c_rm", "2c_em", "rm_em"], "Unknown ini_fin_name: " + ini_fin_name vars_ini = [var for var in sample_ini.keys() if var != "logp"] print("vars_ini:", vars_ini) nsamples_ini = len(sample_ini[vars_ini[0]]) print("nsamples_ini = %d" % nsamples_ini) vars_fin = [var for var in sample_fin.keys() if var != "logp"] print("vars_fin:", vars_fin) nsamples_fin = len(sample_fin[vars_fin[0]]) print("nsamples_fin = %d" % nsamples_fin) # get var names if ini_fin_name in ["2c_rm", "2c_em"]: dg1_var_f = var_starts_with("DeltaG1", vars_fin) ddg_var_f = var_starts_with("DeltaDeltaG", vars_fin) dh1_var_f = var_starts_with("DeltaH1", vars_fin) dh2_var_f = var_starts_with("DeltaH2", vars_fin) dg_var_i = var_starts_with("DeltaG", vars_ini) dh_var_i = var_starts_with("DeltaH", vars_ini) if ini_fin_name == "2c_em": r_var_f = var_starts_with("rho", vars_fin) elif ini_fin_name == "rm_em": r_var_f = var_starts_with("rho", vars_fin) # get redundant parameters from final state sample_redun_fin = {} if ini_fin_name in ["2c_rm", "2c_em"]: sample_redun_fin["DeltaDeltaG"] = sample_fin[ddg_var_f] sample_redun_fin[ "DeltaDeltaH"] = sample_fin[dh2_var_f] - sample_fin[dh1_var_f] if ini_fin_name == "2c_em": sample_redun_fin["rho"] = sample_fin[r_var_f] elif ini_fin_name == "rm_em": sample_redun_fin["rho"] = sample_fin[r_var_f] else: pass print("Vars of sample_redun_fin:", list(sample_redun_fin.keys())) # fit models to sample_redun_fin if aug_with == "Normal": mu_sigma_fin = fit_normal_trace(sample_redun_fin, sigma_robust=sigma_robust) sample_aug_ini = draw_normal_samples(mu_sigma_fin, nsamples_ini) elif aug_with == "Uniform": lower_upper_fin = fit_uniform_trace(sample_redun_fin) sample_aug_ini = draw_uniform_samples(lower_upper_fin, nsamples_ini) elif aug_with == "GaussMix": print("n_components:", n_components) print("covariance_type:", covariance_type) gauss_mix = GaussMix(n_components=n_components, covariance_type=covariance_type) gauss_mix.fit(sample_redun_fin) sample_aug_ini = gauss_mix.sample(n_samples=nsamples_ini) else: pass print("Vars of sample_aug_ini:", list(sample_aug_ini.keys())) ini_fin_var_match = [("P0", "P0"), ("Ls", "Ls"), ("DeltaH_0", "DeltaH_0"), ("log_sigma", "log_sigma")] ini_fin_var_match_extra = [("DeltaG1", "DeltaG1"), ("DeltaDeltaG", "DeltaDeltaG"), ("DeltaH1", "DeltaH1"), ("DeltaH2", "DeltaH2")] # potential for sample drawn from i estimated at state i if aug_with == "Normal": u_i_i = pot_ener_normal_aug(sample_ini, model_ini, sample_aug_ini, mu_sigma_fin) elif aug_with == "Uniform": u_i_i = pot_ener_uniform_aug(sample_ini, model_ini, sample_aug_ini, lower_upper_fin) elif aug_with == "GaussMix": u_i_i = pot_ener_gauss_mix_aug(sample_ini, model_ini, sample_aug_ini, gauss_mix) else: pass # potential for sample drawn from i estimated at state f sample_tmp_ini = {} for ki, kf in ini_fin_var_match: var_ini = var_starts_with(ki, vars_ini) var_fin = var_starts_with(kf, vars_fin) sample_tmp_ini[var_fin] = sample_ini[var_ini] if ini_fin_name in ["2c_rm", "2c_em"]: sample_tmp_ini[dg1_var_f] = sample_ini[ dg_var_i] - 0.5 * sample_aug_ini["DeltaDeltaG"] sample_tmp_ini[ddg_var_f] = sample_aug_ini["DeltaDeltaG"] sample_tmp_ini[dh1_var_f] = sample_ini[ dh_var_i] - 0.5 * sample_aug_ini["DeltaDeltaH"] sample_tmp_ini[dh2_var_f] = sample_ini[ dh_var_i] + 0.5 * sample_aug_ini["DeltaDeltaH"] if ini_fin_name == "2c_em": sample_tmp_ini[r_var_f] = sample_aug_ini["rho"] elif ini_fin_name == "rm_em": for ki, kf in ini_fin_var_match_extra: var_ini = var_starts_with(ki, vars_ini) var_fin = var_starts_with(kf, vars_fin) sample_tmp_ini[var_fin] = sample_ini[var_ini] sample_tmp_ini[r_var_f] = sample_aug_ini["rho"] else: pass u_i_f = pot_ener(sample_tmp_ini, model_fin) del sample_tmp_ini # potential for sample drawn from f estimated at state i sample_tmp_fin = {} for ki, kf in ini_fin_var_match: var_ini = var_starts_with(ki, vars_ini) var_fin = var_starts_with(kf, vars_fin) sample_tmp_fin[var_ini] = sample_fin[var_fin] if ini_fin_name in ["2c_rm", "2c_em"]: sample_tmp_fin[ dg_var_i] = sample_fin[dg1_var_f] + 0.5 * sample_fin[ddg_var_f] sample_tmp_fin[dh_var_i] = 0.5 * (sample_fin[dh1_var_f] + sample_fin[dh2_var_f]) elif ini_fin_name == "rm_em": for ki, kf in ini_fin_var_match_extra: var_ini = var_starts_with(ki, vars_ini) var_fin = var_starts_with(kf, vars_fin) sample_tmp_fin[var_ini] = sample_fin[var_fin] if aug_with == "Normal": u_f_i = pot_ener_normal_aug(sample_tmp_fin, model_ini, sample_redun_fin, mu_sigma_fin) elif aug_with == "Uniform": u_f_i = pot_ener_uniform_aug(sample_tmp_fin, model_ini, sample_redun_fin, lower_upper_fin) elif aug_with == "GaussMix": u_f_i = pot_ener_gauss_mix_aug(sample_tmp_fin, model_ini, sample_redun_fin, gauss_mix) else: pass del sample_tmp_fin # potential for sample drawn from f estimated at state f u_f_f = pot_ener(sample_fin, model_fin) w_F = u_i_f - u_i_i w_R = u_f_i - u_f_f w_F = filter_nan_inf(w_F) w_R = filter_nan_inf(w_R) if (len(w_F) == 0) or (len(w_R) == 0): print("Empty work arrays:", w_F.shape, w_R.shape) if bootstrap is None: return 0. else: return 0., 0. if sample_proportion is None: delta_F = pymbar.BAR(w_F, w_R, compute_uncertainty=False, relative_tolerance=1e-12, verbose=True) bf = -delta_F if bootstrap is None: print("log10(bf) = %0.5f" % (bf * np.log10(np.e))) return bf else: print("Running %d bootstraps to estimate error." % bootstrap) _, bf_err = bootstrap_BAR(w_F, w_R, bootstrap, sample_proportion=1.) print("log10(bf) = %0.5f +/- %0.5f" % (bf * np.log10(np.e), bf_err * np.log10(np.e))) return bf, bf_err else: bf, bf_err = bootstrap_BAR(w_F, w_R, bootstrap, sample_proportion=sample_proportion) return bf, bf_err
def mybar_impl(w): A, _ = pymbar.BAR(w[0], w[1]) return A
N_k = npoints*np.ones([nstates],int) mbar = pymbar.MBAR(u_kln,N_k,relative_tolerance=1.0e-10,verbose=True) (Delta_f_ij_estimated, dDelta_f_ij_estimated) = mbar.getFreeEnergyDifferences() print Delta_f_ij_estimated print dDelta_f_ij_estimated # check these in the case of two. # try exponential averaging, to see if there is a difference. Seems to work. if len(dirnames) == 2: wf = -(u_kln[0,1:,]-u_kln[0,0,:]) (df_forward,ddf_forward) = pymbar.EXP(wf) print('EXP forward %10.4f +/- %7.4f' % (df_forward, ddf_forward)) wr = -(u_kln[1,1:,]-u_kln[1,0,:]) (df_rev, ddf_rev) = pymbar.EXP(wr) print('EXP reverse %10.4f +/- %7.4f' % (df_rev, ddf_rev)) pdb.set_trace() (df_bar, ddf_bar) = pymbar.BAR(-wf,wr) print('BAR reverse %10.4f +/- %7.4f' % (-df_bar, ddf_bar)) # plots the overlap in energy betwen two. Looks good! if plot: plt.clf() plt.hist(wf.T, facecolor='red') plt.hist(wr.T, facecolor='blue') plt.show()