def __init__( self, ubps, lamb, masses, temperature, pressure, n_steps, seed, dt=1.5e-3, friction=1.0, barostat_interval=5, ): intg = lib.LangevinIntegrator(temperature, dt, friction, masses, seed) self.integrator_impl = intg.impl() all_impls = [bp.bound_impl(np.float32) for bp in ubps] bond_list = get_bond_list(ubps[0]) group_idxs = get_group_indices(bond_list) barostat = lib.MonteCarloBarostat(len(masses), pressure, temperature, group_idxs, barostat_interval, seed + 1) barostat_impl = barostat.impl(all_impls) self.bound_impls = all_impls self.barostat_impl = barostat_impl self.lamb = lamb self.n_steps = n_steps
def vacuum_model(ff_params): unbound_potentials, sys_params, masses, coords = rfe.prepare_vacuum_edge( ff_params) x0 = coords v0 = np.zeros_like(coords) client = CUDAPoolClient(1) box = np.eye(3, dtype=np.float64) * 100 harmonic_bond_potential = unbound_potentials[0] group_idxs = get_group_indices(get_bond_list(harmonic_bond_potential)) x0 = coords v0 = np.zeros_like(coords) client = CUDAPoolClient(1) temperature = 300.0 pressure = 1.0 integrator = LangevinIntegrator(temperature, 1.5e-3, 1.0, masses, seed) barostat = MonteCarloBarostat(x0.shape[0], pressure, temperature, group_idxs, 25, seed) model = estimator.FreeEnergyModel(unbound_potentials, client, box, x0, v0, integrator, lambda_schedule, equil_steps, prod_steps, barostat) return estimator.deltaG(model, sys_params)[0]
def test_free_energy_estimator(): n_atoms = 5 x0 = np.random.rand(n_atoms, 3) v0 = np.zeros_like(x0) n_bonds = 3 n_angles = 4 hb_pot, hb_params = get_harmonic_bond(n_atoms, n_bonds) ha_pot, ha_params = get_harmonic_angle(n_atoms, n_angles) sys_params = [hb_params, ha_params] unbound_potentials = [hb_pot, ha_pot] masses = np.random.rand(n_atoms) box = np.eye(3, dtype=np.float64) seed = 2021 group_idxs = get_group_indices(get_bond_list(hb_pot)) temperature = 300.0 pressure = 1.0 integrator = LangevinIntegrator(temperature, 1.5e-3, 1.0, masses, seed) barostat = MonteCarloBarostat(x0.shape[0], pressure, temperature, group_idxs, 25, seed) beta = 0.125 lambda_schedule = np.linspace(0, 1.0, 4) def loss_fn(sys_params): endpoint_correct = False mdl = estimator_abfe.FreeEnergyModel( unbound_potentials, endpoint_correct, client, box, x0, v0, integrator, barostat, lambda_schedule, 100, 100, beta, "test", ) dG, bar_dG_err, results = estimator_abfe.deltaG(mdl, sys_params) return dG**2 for client in [None, CUDAPoolClient(1)]: loss_fn(sys_params)
def test_barostat_zero_interval(): pressure = 1.0 * unit.atmosphere temperature = 300.0 * unit.kelvin initial_waterbox_width = 2.5 * unit.nanometer seed = 2021 np.random.seed(seed) mol_a = hif2a_ligand_pair.mol_a ff = hif2a_ligand_pair.ff complex_system, complex_coords, complex_box, complex_top = build_water_system( initial_waterbox_width.value_in_unit(unit.nanometer)) afe = AbsoluteFreeEnergy(mol_a, ff) unbound_potentials, sys_params, masses, coords = afe.prepare_host_edge( ff.get_ordered_params(), complex_system, complex_coords) # get list of molecules for barostat by looking at bond table harmonic_bond_potential = unbound_potentials[0] bond_list = get_bond_list(harmonic_bond_potential) group_indices = get_group_indices(bond_list) bound_potentials = [] for params, unbound_pot in zip(sys_params, unbound_potentials): bp = unbound_pot.bind(np.asarray(params)) bound_potentials.append(bp) u_impls = [] for bp in bound_potentials: bp_impl = bp.bound_impl(precision=np.float32) u_impls.append(bp_impl) with pytest.raises(RuntimeError): custom_ops.MonteCarloBarostat( coords.shape[0], pressure.value_in_unit(unit.bar), temperature.value_in_unit(unit.kelvin), group_indices, 0, u_impls, seed, ) # Setting it to 1 should be valid. baro = custom_ops.MonteCarloBarostat( coords.shape[0], pressure.value_in_unit(unit.bar), temperature.value_in_unit(unit.kelvin), group_indices, 1, u_impls, seed, ) # Setting back to 0 should raise another error with pytest.raises(RuntimeError): baro.set_interval(0)
def binding_model(ff_params): dGs = [] for host_system, host_coords, host_box in [ (complex_system, complex_coords, complex_box), (solvent_system, solvent_coords, solvent_box), ]: # minimize the host to avoid clashes host_coords = minimizer.minimize_host_4d([mol_a], host_system, host_coords, ff, host_box) unbound_potentials, sys_params, masses, coords = rfe.prepare_host_edge( ff_params, host_system, host_coords) x0 = coords v0 = np.zeros_like(coords) client = CUDAPoolClient(1) harmonic_bond_potential = unbound_potentials[0] group_idxs = get_group_indices( get_bond_list(harmonic_bond_potential)) temperature = 300.0 pressure = 1.0 integrator = LangevinIntegrator(temperature, 1.5e-3, 1.0, masses, seed) barostat = MonteCarloBarostat(x0.shape[0], pressure, temperature, group_idxs, 25, seed) model = estimator.FreeEnergyModel( unbound_potentials, client, host_box, x0, v0, integrator, lambda_schedule, equil_steps, prod_steps, barostat, ) dG, _ = estimator.deltaG(model, sys_params) dGs.append(dG) return dGs[0] - dGs[1]
def equilibrate_solvent_phase( ubps, params, masses, coords, # minimized_coords box, temperature, pressure, num_steps, seed=None, ): """ Generate samples in the solvent phase. """ dt = 1e-4 friction = 1.0 bps = [] for p, bp in zip(params, ubps): bps.append(bp.bind(p)) all_impls = [bp.bound_impl(np.float32) for bp in bps] intg_equil = lib.LangevinIntegrator(temperature, dt, friction, masses, seed) intg_equil_impl = intg_equil.impl() bond_list = get_bond_list(ubps[0]) group_idxs = get_group_indices(bond_list) barostat_interval = 5 barostat = lib.MonteCarloBarostat(len(masses), pressure, temperature, group_idxs, barostat_interval, seed + 1) barostat_impl = barostat.impl(all_impls) # equilibration/minimization doesn't need a barostat equil_ctxt = custom_ops.Context(coords, np.zeros_like(coords), box, intg_equil_impl, all_impls, barostat_impl) lamb = 0.0 # TODO: revert to 50k equil_schedule = np.ones(num_steps) * lamb equil_ctxt.multiple_steps(equil_schedule) x0 = equil_ctxt.get_x_t() # (ytz): This has to be zeros_like for now since if we freeze ligand # coordinates it would start to move during rejected moves. v0 = np.zeros_like(x0) return CoordsVelBox(x0, v0, equil_ctxt.get_box())
temperature.value_in_unit(unit.kelvin), timestep.value_in_unit(unit.picosecond), collision_rate.value_in_unit(unit.picosecond**-1), masses, seed, ) integrator_impl = integrator.impl() def reduced_potential_fxn(x, box, lam): u, du_dx = ensemble.reduced_potential_and_gradient(x, box, lam) return u # get list of molecules for barostat by looking at bond table harmonic_bond_potential = unbound_potentials[0] bond_list = get_bond_list(harmonic_bond_potential) group_indices = get_group_indices(bond_list) trajs = [] volume_trajs = [] # run at lambda=1.0, n_replicates times lambdas = np.ones(n_replicates) for lam in lambdas: thermostat = UnadjustedLangevinMove(integrator_impl, potential_energy_model.all_impls, lam, n_steps=barostat_interval) barostat = MonteCarloBarostat(partial(reduced_potential_fxn, lam=lam), group_indices, max_delta_volume=3.0)
def simulate_futures( self, ff_params, mol, x0, box0, prefix, core_idxs=None, seed=0 ) -> Tuple[List[Any], estimator_abfe.FreeEnergyModel, List[Any]]: top = self.setup_topology(mol) afe = free_energy_rabfe.AbsoluteFreeEnergy(mol, top) unbound_potentials, sys_params, masses = afe.prepare_host_edge( ff_params, self.host_system) if seed == 0: seed = np.random.randint(np.iinfo(np.int32).max) beta = 1 / (constants.BOLTZ * self.temperature) bond_list = get_bond_list(unbound_potentials[0]) masses = model_utils.apply_hmr(masses, bond_list) friction = 1.0 integrator = LangevinIntegrator(self.temperature, self.dt, friction, masses, seed) group_indices = get_group_indices(bond_list) barostat_interval = 5 barostat = MonteCarloBarostat(x0.shape[0], self.pressure, self.temperature, group_indices, barostat_interval, seed) v0 = np.zeros_like(x0) endpoint_correct = False model = estimator_abfe.FreeEnergyModel( unbound_potentials, endpoint_correct, self.client, box0, x0, v0, integrator, barostat, self.host_schedule, self.equil_steps, self.prod_steps, beta, prefix, ) bound_potentials = [] for params, unbound_pot in zip(sys_params, model.unbound_potentials): bp = unbound_pot.bind(np.asarray(params)) bound_potentials.append(bp) all_args = [] for lamb_idx, lamb in enumerate(model.lambda_schedule): subsample_interval = 1000 all_args.append(( lamb, model.box, model.x0, model.v0, bound_potentials, model.integrator, model.barostat, model.equil_steps, model.prod_steps, subsample_interval, subsample_interval, model.lambda_schedule, )) if endpoint_correct: assert isinstance(bound_potentials[-1], potentials.HarmonicBond) all_args.append(( 1.0, model.box, model.x0, model.v0, bound_potentials[:-1], # strip out the restraints model.integrator, model.barostat, model.equil_steps, model.prod_steps, subsample_interval, subsample_interval, [], # no need to evaluate Us for the endpoint correction )) futures = [] if self.client is None: for args in all_args: futures.append(_MockFuture(estimator_abfe.simulate(*args))) else: for args in all_args: futures.append( self.client.submit(estimator_abfe.simulate, *args)) return sys_params, model, futures
def equilibrate_host( mol: Chem.Mol, host_system: openmm.System, host_coords: NDArray, temperature: float, pressure: float, ff: Forcefield, box: NDArray, n_steps: int, seed: Optional[int] = None, ) -> Tuple[NDArray, NDArray]: """ Equilibrate a host system given a reference molecule using the MonteCarloBarostat. Useful for preparing a host that will be used for multiple FEP calculations using the same reference, IE a starmap. Performs the following: - Minimize host with rigid mol - Minimize host and mol - Run n_steps with HMR enabled and MonteCarloBarostat every 5 steps Parameters ---------- mol: Chem.Mol Ligand for the host to equilibrate with. host_system: openmm.System OpenMM System representing the host. host_coords: np.ndarray N x 3 coordinates of the host. units of nanometers. temperature: float Temperature at which to run the simulation. Units of kelvins. pressure: float Pressure at which to run the simulation. Units of bars. ff: ff.Forcefield Wrapper class around a list of handlers. box: np.ndarray [3,3] Box matrix for periodic boundary conditions. units of nanometers. n_steps: int Number of steps to run the simulation for. seed: int or None Value to seed simulation with Returns ------- tuple (coords, box) Returns equilibrated system coords as well as the box. """ # insert mol into the binding pocket. host_bps, host_masses = openmm_deserializer.deserialize_system(host_system, cutoff=1.2) min_host_coords = minimize_host_4d([mol], host_system, host_coords, ff, box) ligand_masses = [a.GetMass() for a in mol.GetAtoms()] ligand_coords = get_romol_conf(mol) combined_masses = np.concatenate([host_masses, ligand_masses]) combined_coords = np.concatenate([min_host_coords, ligand_coords]) top = topology.BaseTopology(mol, ff) hgt = topology.HostGuestTopology(host_bps, top) # setup the parameter handlers for the ligand tuples = [ [hgt.parameterize_harmonic_bond, [ff.hb_handle]], [hgt.parameterize_harmonic_angle, [ff.ha_handle]], [hgt.parameterize_periodic_torsion, [ff.pt_handle, ff.it_handle]], [hgt.parameterize_nonbonded, [ff.q_handle, ff.lj_handle]], ] u_impls = [] bound_potentials = [] for fn, handles in tuples: params, potential = fn(*[h.params for h in handles]) bp = potential.bind(params) bound_potentials.append(bp) u_impls.append(bp.bound_impl(precision=np.float32)) bond_list = get_bond_list(bound_potentials[0]) combined_masses = model_utils.apply_hmr(combined_masses, bond_list) dt = 2.5e-3 friction = 1.0 if seed is None: seed = np.random.randint(np.iinfo(np.int32).max) integrator = LangevinIntegrator(temperature, dt, friction, combined_masses, seed).impl() x0 = combined_coords v0 = np.zeros_like(x0) group_indices = get_group_indices(bond_list) barostat_interval = 5 barostat = MonteCarloBarostat(x0.shape[0], pressure, temperature, group_indices, barostat_interval, seed).impl( u_impls ) # Re-minimize with the mol being flexible x0 = fire_minimize(x0, u_impls, box, np.ones(50)) # context components: positions, velocities, box, integrator, energy fxns ctxt = custom_ops.Context(x0, v0, box, integrator, u_impls, barostat) ctxt.multiple_steps(np.linspace(0.0, 0.0, n_steps)) return ctxt.get_x_t(), ctxt.get_box()
def equilibrate_edges( self, edges: List[Tuple[Chem.Mol, Chem.Mol, np.ndarray]], lamb: float = 0.0, barostat_interval: int = 10, equilibration_steps: int = 100000, cache_path: str = "equilibration_cache.pkl", ): """ edges: List of tuples with mol_a, mol_b, core Edges to equilibrate lamb: float Lambda value to equilibrate at. Uses Dual Topology to equilibrate barostat_interval: int Interval on which to run barostat during equilibration equilibration_steps: int Number of steps to equilibrate the edge for cache_path: string Path to look for existing cache or path to where to save cache. By default it will write out a pickle file in the local directory. Pre equilibrate edges and cache them for later use in predictions. Parallelized via the model client if possible """ if not self.pre_equilibrate: return if os.path.isfile(cache_path): with open(cache_path, "rb") as ifs: self._equil_cache = load(ifs) print("Loaded Pre-equilibrated structures from cache") return futures = [] ordered_params = self.ff.get_ordered_params() temperature = 300.0 pressure = 1.0 for stage, host_system, host_coords, host_box in [ ("complex", self.complex_system, self.complex_coords, self.complex_box), ("solvent", self.solvent_system, self.solvent_coords, self.solvent_box), ]: # Run all complex legs first then solvent, as they will likely take longer than then solvent leg for mol_a, mol_b, core in edges: # Use DualTopology to ensure mols exist in the same space. topo = topology.DualTopologyMinimization(mol_a, mol_b, self.ff) rfe = free_energy.RelativeFreeEnergy(topo) min_coords = minimizer.minimize_host_4d([mol_a, mol_b], host_system, host_coords, self.ff, host_box) unbound_potentials, sys_params, masses, coords = rfe.prepare_host_edge( ordered_params, host_system, min_coords) # num_host_coords = len(host_coords) # masses[num_host_coords:] *= 1000000 # Lets see if masses are the difference harmonic_bond_potential = unbound_potentials[0] bond_list = get_bond_list(harmonic_bond_potential) group_idxs = get_group_indices(bond_list) time_step = 1.5e-3 if self.hmr: masses = apply_hmr(masses, bond_list) time_step = 2.5e-3 integrator = LangevinIntegrator(temperature, time_step, 1.0, masses, 0) barostat = MonteCarloBarostat(coords.shape[0], pressure, temperature, group_idxs, barostat_interval, 0) pots = [] for bp, params in zip(unbound_potentials, sys_params): pots.append(bp.bind(np.asarray(params))) future = self.client.submit( estimator.equilibrate, *[ integrator, barostat, pots, coords, host_box, lamb, equilibration_steps ]) futures.append((stage, (mol_a, mol_b, core), future)) num_equil = len(futures) for i, (stage, edge, future) in enumerate(futures): edge_hash = self._edge_hash(stage, *edge) self._equil_cache[edge_hash] = future.result() if (i + 1) % 5 == 0: print(f"Pre-equilibrated {i+1} of {num_equil} edges") print(f"Pre-equilibrated {num_equil} edges") if cache_path: with open(cache_path, "wb") as ofs: dump(self._equil_cache, ofs) print(f"Saved equilibration_cache to {cache_path}")
def predict(self, ff_params: list, mol_a: Chem.Mol, mol_b: Chem.Mol, core: np.ndarray): """ Predict the ddG of morphing mol_a into mol_b. This function is differentiable w.r.t. ff_params. Parameters ---------- ff_params: list of np.ndarray This should match the ordered params returned by the forcefield mol_a: Chem.Mol Starting molecule corresponding to lambda = 0 mol_b: Chem.Mol Starting molecule corresponding to lambda = 1 core: np.ndarray N x 2 list of ints corresponding to the atom mapping of the core. Returns ------- float delta delta G in kJ/mol aux list of TI results """ stage_dGs = [] stage_results = [] for stage, host_system, host_coords, host_box, lambda_schedule in [ ("complex", self.complex_system, self.complex_coords, self.complex_box, self.complex_schedule), ("solvent", self.solvent_system, self.solvent_coords, self.solvent_box, self.solvent_schedule), ]: single_topology = topology.SingleTopology(mol_a, mol_b, core, self.ff) rfe = free_energy.RelativeFreeEnergy(single_topology) edge_hash = self._edge_hash(stage, mol_a, mol_b, core) if self.pre_equilibrate and edge_hash in self._equil_cache: cached_state = self._equil_cache[edge_hash] x0 = cached_state.coords host_box = cached_state.box num_host_coords = len(host_coords) unbound_potentials, sys_params, masses, _ = rfe.prepare_host_edge( ff_params, host_system, host_coords) mol_a_size = mol_a.GetNumAtoms() # Use Dual Topology to pre equilibrate, so have to get the mean of the two sets of mol, # normally done within prepare_host_edge, but the whole system has moved by this stage x0 = np.concatenate([ x0[:num_host_coords], np.mean( single_topology.interpolate_params( x0[num_host_coords:num_host_coords + mol_a_size], x0[num_host_coords + mol_a_size:]), axis=0, ), ]) else: if self.pre_equilibrate: print( "Edge not correctly pre-equilibrated, ensure equilibrate_edges was called" ) print( f"Minimizing the {stage} host structure to remove clashes." ) # (ytz): this isn't strictly symmetric, and we should modify minimize later on remove # the hysteresis by jointly minimizing against a and b at the same time. We may also want # to remove the randomness completely from the minimization. min_host_coords = minimizer.minimize_host_4d([mol_a, mol_b], host_system, host_coords, self.ff, host_box) unbound_potentials, sys_params, masses, coords = rfe.prepare_host_edge( ff_params, host_system, min_host_coords) x0 = coords v0 = np.zeros_like(x0) time_step = 1.5e-3 harmonic_bond_potential = unbound_potentials[0] bond_list = get_bond_list(harmonic_bond_potential) if self.hmr: masses = apply_hmr(masses, bond_list) time_step = 2.5e-3 group_idxs = get_group_indices(bond_list) seed = 0 temperature = 300.0 pressure = 1.0 integrator = LangevinIntegrator(temperature, time_step, 1.0, masses, seed) barostat = MonteCarloBarostat(x0.shape[0], pressure, temperature, group_idxs, self.barostat_interval, seed) model = estimator.FreeEnergyModel( unbound_potentials, self.client, host_box, x0, v0, integrator, lambda_schedule, self.equil_steps, self.prod_steps, barostat, ) dG, results = estimator.deltaG(model, sys_params) stage_dGs.append(dG) stage_results.append((stage, results)) pred = stage_dGs[0] - stage_dGs[1] return pred, stage_results
def benchmark( label, masses, lamb, x0, v0, box, bound_potentials, hmr=False, verbose=True, num_batches=100, steps_per_batch=1000, compute_du_dl_interval=0, barostat_interval=0, ): """ TODO: configuration blob containing num_batches, steps_per_batch, and any other options """ seed = 1234 dt = 1.5e-3 temperature = 300 pressure = 1.0 seconds_per_day = 86400 harmonic_bond_potential = bound_potentials[0] bond_list = get_bond_list(harmonic_bond_potential) if hmr: dt = 2.5e-3 masses = apply_hmr(masses, bond_list) intg = LangevinIntegrator(temperature, dt, 1.0, np.array(masses), seed).impl() bps = [] for potential in bound_potentials: bps.append(potential.bound_impl(precision=np.float32)) # get the bound implementation baro_impl = None if barostat_interval > 0: group_idxs = get_group_indices(bond_list) baro = MonteCarloBarostat( x0.shape[0], pressure, temperature, group_idxs, barostat_interval, seed, ) baro_impl = baro.impl(bps) ctxt = custom_ops.Context( x0, v0, box, intg, bps, barostat=baro_impl, ) batch_times = [] lambda_schedule = np.ones(steps_per_batch) * lamb # run once before timer starts ctxt.multiple_steps(lambda_schedule, compute_du_dl_interval) start = time.time() for batch in range(num_batches): # time the current batch batch_start = time.time() du_dls, _, _ = ctxt.multiple_steps(lambda_schedule, compute_du_dl_interval) batch_end = time.time() delta = batch_end - batch_start batch_times.append(delta) steps_per_second = steps_per_batch / np.mean(batch_times) steps_per_day = steps_per_second * seconds_per_day ps_per_day = dt * steps_per_day ns_per_day = ps_per_day * 1e-3 if verbose: print(f"steps per second: {steps_per_second:.3f}") print(f"ns per day: {ns_per_day:.3f}") assert np.all(np.abs(ctxt.get_x_t()) < 1000) print( f"{label}: N={x0.shape[0]} speed: {ns_per_day:.2f}ns/day dt: {dt*1e3}fs (ran {steps_per_batch * num_batches} steps in {(time.time() - start):.2f}s)" )
def test_barostat_partial_group_idxs(): """Verify that the barostat can handle a subset of the molecules rather than all of them. This test only verify that it runs, not the behavior""" temperature = 300.0 * unit.kelvin initial_waterbox_width = 3.0 * unit.nanometer timestep = 1.5 * unit.femtosecond barostat_interval = 3 collision_rate = 1.0 / unit.picosecond seed = 2021 np.random.seed(seed) pressure = 1.0 * unit.atmosphere mol_a = hif2a_ligand_pair.mol_a ff = hif2a_ligand_pair.ff complex_system, complex_coords, complex_box, complex_top = build_water_system( initial_waterbox_width.value_in_unit(unit.nanometer)) min_complex_coords = minimize_host_4d([mol_a], complex_system, complex_coords, ff, complex_box) afe = AbsoluteFreeEnergy(mol_a, ff) unbound_potentials, sys_params, masses, coords = afe.prepare_host_edge( ff.get_ordered_params(), complex_system, min_complex_coords) # get list of molecules for barostat by looking at bond table harmonic_bond_potential = unbound_potentials[0] bond_list = get_bond_list(harmonic_bond_potential) group_indices = get_group_indices(bond_list) # Cut the number of groups in half group_indices = group_indices[len(group_indices) // 2:] lam = 1.0 bound_potentials = [] for params, unbound_pot in zip(sys_params, unbound_potentials): bp = unbound_pot.bind(np.asarray(params)) bound_potentials.append(bp) u_impls = [] for bp in bound_potentials: bp_impl = bp.bound_impl(precision=np.float32) u_impls.append(bp_impl) integrator = LangevinIntegrator( temperature.value_in_unit(unit.kelvin), timestep.value_in_unit(unit.picosecond), collision_rate.value_in_unit(unit.picosecond**-1), masses, seed, ) integrator_impl = integrator.impl() v_0 = sample_velocities(masses * unit.amu, temperature) baro = custom_ops.MonteCarloBarostat( coords.shape[0], pressure.value_in_unit(unit.bar), temperature.value_in_unit(unit.kelvin), group_indices, barostat_interval, u_impls, seed, ) ctxt = custom_ops.Context(coords, v_0, complex_box, integrator_impl, u_impls, barostat=baro) ctxt.multiple_steps(np.ones(1000) * lam)
def test_molecular_ideal_gas(): """ References ---------- OpenMM testIdealGas https://github.com/openmm/openmm/blob/d8ef57fed6554ec95684e53768188e1f666405c9/tests/TestMonteCarloBarostat.h#L86-L140 """ # simulation parameters initial_waterbox_width = 3.0 * unit.nanometer timestep = 1.5 * unit.femtosecond collision_rate = 1.0 / unit.picosecond n_moves = 10000 barostat_interval = 5 seed = 2021 # thermodynamic parameters temperatures = np.array([300, 600, 1000]) * unit.kelvin pressure = 100.0 * unit.bar # very high pressure, to keep the expected volume small # generate an alchemical system of a waterbox + alchemical ligand: # effectively discard ligands by running in AbsoluteFreeEnergy mode at lambda = 1.0 mol_a = hif2a_ligand_pair.mol_a ff = hif2a_ligand_pair.ff complex_system, complex_coords, complex_box, complex_top = build_water_system( initial_waterbox_width.value_in_unit(unit.nanometer)) min_complex_coords = minimize_host_4d([mol_a], complex_system, complex_coords, ff, complex_box) afe = AbsoluteFreeEnergy(mol_a, ff) _unbound_potentials, _sys_params, masses, coords = afe.prepare_host_edge( ff.get_ordered_params(), complex_system, min_complex_coords) # drop the nonbonded potential unbound_potentials = _unbound_potentials[:-1] sys_params = _sys_params[:-1] # get list of molecules for barostat by looking at bond table harmonic_bond_potential = unbound_potentials[0] bond_list = get_bond_list(harmonic_bond_potential) group_indices = get_group_indices(bond_list) volume_trajs = [] relative_tolerance = 1e-2 initial_relative_box_perturbation = 2 * relative_tolerance n_molecules = complex_top.getNumResidues() bound_potentials = [] for params, unbound_pot in zip(sys_params, unbound_potentials): bp = unbound_pot.bind(np.asarray(params)) bound_potentials.append(bp) u_impls = [] for bp in bound_potentials: bp_impl = bp.bound_impl(precision=np.float32) u_impls.append(bp_impl) # expected volume md_pressure_unit = ENERGY_UNIT / DISTANCE_UNIT**3 pressure_in_md = ( pressure * unit.AVOGADRO_CONSTANT_NA).value_in_unit(md_pressure_unit) expected_volume_in_md = (n_molecules + 1) * BOLTZ * temperatures.value_in_unit( unit.kelvin) / pressure_in_md for i, temperature in enumerate(temperatures): # define a thermostat integrator = LangevinIntegrator( temperature.value_in_unit(unit.kelvin), timestep.value_in_unit(unit.picosecond), collision_rate.value_in_unit(unit.picosecond**-1), masses, seed, ) integrator_impl = integrator.impl() v_0 = sample_velocities(masses * unit.amu, temperature) # rescale the box to be approximately the desired box volume already rescaler = CentroidRescaler(group_indices) initial_volume = compute_box_volume(complex_box) initial_center = compute_box_center(complex_box) length_scale = ((1 + initial_relative_box_perturbation) * expected_volume_in_md[i] / initial_volume)**(1.0 / 3) new_coords = rescaler.scale_centroids(coords, initial_center, length_scale) new_box = complex_box * length_scale baro = custom_ops.MonteCarloBarostat( new_coords.shape[0], pressure.value_in_unit(unit.bar), temperature.value_in_unit(unit.kelvin), group_indices, barostat_interval, u_impls, seed, ) ctxt = custom_ops.Context(new_coords, v_0, new_box, integrator_impl, u_impls, barostat=baro) vols = [] for move in range(n_moves // barostat_interval): ctxt.multiple_steps(np.ones(barostat_interval)) new_box = ctxt.get_box() volume = np.linalg.det(new_box) vols.append(volume) volume_trajs.append(vols) equil_time = len(volume_trajs[0]) // 2 # TODO: don't hard-code this? actual_volume_in_md = np.array( [np.mean(volume_traj[equil_time:]) for volume_traj in volume_trajs]) np.testing.assert_allclose(actual=actual_volume_in_md, desired=expected_volume_in_md, rtol=relative_tolerance)
def test_barostat_varying_pressure(): temperature = 300.0 * unit.kelvin initial_waterbox_width = 3.0 * unit.nanometer timestep = 1.5 * unit.femtosecond barostat_interval = 3 collision_rate = 1.0 / unit.picosecond seed = 2021 np.random.seed(seed) # Start out with a very large pressure pressure = 1000.0 * unit.atmosphere mol_a = hif2a_ligand_pair.mol_a ff = hif2a_ligand_pair.ff complex_system, complex_coords, complex_box, complex_top = build_water_system( initial_waterbox_width.value_in_unit(unit.nanometer)) min_complex_coords = minimize_host_4d([mol_a], complex_system, complex_coords, ff, complex_box) afe = AbsoluteFreeEnergy(mol_a, ff) unbound_potentials, sys_params, masses, coords = afe.prepare_host_edge( ff.get_ordered_params(), complex_system, min_complex_coords) # get list of molecules for barostat by looking at bond table harmonic_bond_potential = unbound_potentials[0] bond_list = get_bond_list(harmonic_bond_potential) group_indices = get_group_indices(bond_list) lam = 1.0 u_impls = [] for params, unbound_pot in zip(sys_params, unbound_potentials): bp = unbound_pot.bind(np.asarray(params)) bp_impl = bp.bound_impl(precision=np.float32) u_impls.append(bp_impl) integrator = LangevinIntegrator( temperature.value_in_unit(unit.kelvin), timestep.value_in_unit(unit.picosecond), collision_rate.value_in_unit(unit.picosecond**-1), masses, seed, ) integrator_impl = integrator.impl() v_0 = sample_velocities(masses * unit.amu, temperature) baro = custom_ops.MonteCarloBarostat( coords.shape[0], pressure.value_in_unit(unit.bar), temperature.value_in_unit(unit.kelvin), group_indices, barostat_interval, u_impls, seed, ) ctxt = custom_ops.Context(coords, v_0, complex_box, integrator_impl, u_impls, barostat=baro) ctxt.multiple_steps(np.ones(1000) * lam) ten_atm_box = ctxt.get_box() ten_atm_box_vol = compute_box_volume(ten_atm_box) # Expect the box to shrink thanks to the barostat assert compute_box_volume(complex_box) - ten_atm_box_vol > 0.4 # Set the pressure to 1 bar baro.set_pressure((1 * unit.atmosphere).value_in_unit(unit.bar)) # Changing the barostat interval resets the barostat step. baro.set_interval(2) ctxt.multiple_steps(np.ones(2000) * lam) atm_box = ctxt.get_box() # Box will grow thanks to the lower pressure assert compute_box_volume(atm_box) > ten_atm_box_vol
def test_free_energy_estimator_with_endpoint_correction(): """ Test that we generate correctly shaped derivatives in the estimator code when the endpoint correction is turned on. We expected that f([a,b,c,...]) to generate derivatives df/da, df/db, df/dc, df/d... such that df/da.shape == a.shape, df/db.shape == b.shape, df/dc == c.shape, and etc. """ n_atoms = 15 x0 = np.random.rand(n_atoms, 3) v0 = np.zeros_like(x0) n_bonds = 3 n_angles = 4 n_restraints = 5 hb_pot, hb_params = get_harmonic_bond(n_atoms, n_bonds) ha_pot, ha_params = get_harmonic_angle(n_atoms, n_angles) rs_pot, rs_params = get_harmonic_restraints(n_atoms, n_restraints) sys_params = [hb_params, ha_params, rs_params] unbound_potentials = [hb_pot, ha_pot, rs_pot] masses = np.random.rand(n_atoms) box = np.eye(3, dtype=np.float64) seed = 2021 group_idxs = get_group_indices(get_bond_list(hb_pot)) temperature = 300.0 pressure = 1.0 integrator = LangevinIntegrator(temperature, 1.5e-3, 1.0, masses, seed) barostat = MonteCarloBarostat(x0.shape[0], pressure, temperature, group_idxs, 25, seed) beta = 0.125 lambda_schedule = np.linspace(0, 1.0, 4) def loss_fn(sys_params): endpoint_correct = True mdl = estimator_abfe.FreeEnergyModel( unbound_potentials, endpoint_correct, client, box, x0, v0, integrator, barostat, lambda_schedule, 100, 100, beta, "test", ) dG, bar_dG_err, results = estimator_abfe.deltaG(mdl, sys_params) return dG**2 for client in [None, CUDAPoolClient(1)]: loss_fn(sys_params)
def _futures_a_to_b(self, ff_params, mol_a, mol_b, combined_core_idxs, x0, box0, prefix, seed): num_host_atoms = x0.shape[0] - mol_a.GetNumAtoms() - mol_b.GetNumAtoms( ) # (ytz): super ugly, undo combined_core_idxs to get back original idxs core_idxs = combined_core_idxs - num_host_atoms core_idxs[:, 1] -= mol_a.GetNumAtoms() dual_topology = self.setup_topology(mol_a, mol_b) rfe = free_energy_rabfe.RelativeFreeEnergy(dual_topology) unbound_potentials, sys_params, masses = rfe.prepare_host_edge( ff_params, self.host_system) k_core = 30.0 core_params = np.zeros_like(combined_core_idxs).astype(np.float64) core_params[:, 0] = k_core restraint_potential = potentials.HarmonicBond(combined_core_idxs, ) unbound_potentials.append(restraint_potential) sys_params.append(core_params) # tbd sample from boltzmann distribution later v0 = np.zeros_like(x0) beta = 1 / (constants.BOLTZ * self.temperature) bond_list = np.concatenate( [unbound_potentials[0].get_idxs(), core_idxs]) masses = model_utils.apply_hmr(masses, bond_list) friction = 1.0 integrator = LangevinIntegrator(self.temperature, self.dt, friction, masses, seed) bond_list = list(map(tuple, bond_list)) group_indices = get_group_indices(bond_list) barostat_interval = 5 barostat = MonteCarloBarostat(x0.shape[0], self.pressure, self.temperature, group_indices, barostat_interval, seed) endpoint_correct = True model = estimator_abfe.FreeEnergyModel( unbound_potentials, endpoint_correct, self.client, box0, # important, use equilibrated box. x0, v0, integrator, barostat, self.host_schedule, self.equil_steps, self.prod_steps, beta, prefix, ) bound_potentials = [] for params, unbound_pot in zip(sys_params, model.unbound_potentials): bp = unbound_pot.bind(np.asarray(params)) bound_potentials.append(bp) all_args = [] for lamb_idx, lamb in enumerate(model.lambda_schedule): subsample_interval = 1000 all_args.append(( lamb, model.box, model.x0, model.v0, bound_potentials, model.integrator, model.barostat, model.equil_steps, model.prod_steps, subsample_interval, subsample_interval, model.lambda_schedule, )) if endpoint_correct: assert isinstance(bound_potentials[-1], potentials.HarmonicBond) all_args.append(( 1.0, model.box, model.x0, model.v0, bound_potentials[:-1], # strip out the restraints model.integrator, model.barostat, model.equil_steps, model.prod_steps, subsample_interval, subsample_interval, [], # no need to evaluate Us for the endpoint correction )) futures = [] if self.client is None: for args in all_args: futures.append(_MockFuture(estimator_abfe.simulate(*args))) else: for args in all_args: futures.append( self.client.submit(estimator_abfe.simulate, *args)) return sys_params, model, futures
def test_barostat_is_deterministic(): """Verify that the barostat results in the same box size shift after 1000 steps. This is important to debugging as well as providing the ability to replicate simulations """ platform_version = get_platform_version() lam = 1.0 temperature = 300.0 * unit.kelvin initial_waterbox_width = 3.0 * unit.nanometer timestep = 1.5 * unit.femtosecond barostat_interval = 3 collision_rate = 1.0 / unit.picosecond seed = 2021 np.random.seed(seed) # OpenEye's AM1 Charging values are OS platform dependent. To ensure that we have deterministic values # we check against our two most common OS versions, Ubuntu 18.04 and 20.04. box_vol = 26.869380588831582 lig_charge_vals = np.array([ 1.4572377542719206, -0.37011462071257184, 1.1478267014520305, -4.920284514559682, 0.16985194917937935 ]) if "ubuntu" not in platform_version: print( f"Test expected to run under ubuntu 20.04 or 18.04, got {platform_version}" ) if "18.04" in platform_version: box_vol = 26.711716908713402 lig_charge_vals[3] = -4.920166483601927 pressure = 1.0 * unit.atmosphere mol_a = hif2a_ligand_pair.mol_a ff = hif2a_ligand_pair.ff complex_system, complex_coords, complex_box, complex_top = build_water_system( initial_waterbox_width.value_in_unit(unit.nanometer)) min_complex_coords = minimize_host_4d([mol_a], complex_system, complex_coords, ff, complex_box) afe = AbsoluteFreeEnergy(mol_a, ff) unbound_potentials, sys_params, masses, coords = afe.prepare_host_edge( ff.get_ordered_params(), complex_system, min_complex_coords) # get list of molecules for barostat by looking at bond table harmonic_bond_potential = unbound_potentials[0] bond_list = get_bond_list(harmonic_bond_potential) group_indices = get_group_indices(bond_list) u_impls = [] # Look at the first five atoms and their assigned charges ligand_charges = sys_params[-1][:, 0][len(min_complex_coords):][:5] np.testing.assert_array_almost_equal(lig_charge_vals, ligand_charges, decimal=5) for params, unbound_pot in zip(sys_params, unbound_potentials): bp = unbound_pot.bind(np.asarray(params)) bp_impl = bp.bound_impl(precision=np.float32) u_impls.append(bp_impl) integrator = LangevinIntegrator( temperature.value_in_unit(unit.kelvin), timestep.value_in_unit(unit.picosecond), collision_rate.value_in_unit(unit.picosecond**-1), masses, seed, ) integrator_impl = integrator.impl() v_0 = sample_velocities(masses * unit.amu, temperature) baro = custom_ops.MonteCarloBarostat( coords.shape[0], pressure.value_in_unit(unit.bar), temperature.value_in_unit(unit.kelvin), group_indices, barostat_interval, u_impls, seed, ) ctxt = custom_ops.Context(coords, v_0, complex_box, integrator_impl, u_impls, barostat=baro) ctxt.multiple_steps(np.ones(1000) * lam) atm_box = ctxt.get_box() np.testing.assert_almost_equal(compute_box_volume(atm_box), box_vol, decimal=5)