def simulate_futures( self, ff_params, mol, x0, box0, prefix, core_idxs=None, seed=0 ) -> Tuple[List[Any], estimator_abfe.FreeEnergyModel, List[Any]]: top = self.setup_topology(mol) afe = free_energy_rabfe.AbsoluteFreeEnergy(mol, top) unbound_potentials, sys_params, masses = afe.prepare_host_edge( ff_params, self.host_system) if seed == 0: seed = np.random.randint(np.iinfo(np.int32).max) beta = 1 / (constants.BOLTZ * self.temperature) bond_list = get_bond_list(unbound_potentials[0]) masses = model_utils.apply_hmr(masses, bond_list) friction = 1.0 integrator = LangevinIntegrator(self.temperature, self.dt, friction, masses, seed) group_indices = get_group_indices(bond_list) barostat_interval = 5 barostat = MonteCarloBarostat(x0.shape[0], self.pressure, self.temperature, group_indices, barostat_interval, seed) v0 = np.zeros_like(x0) endpoint_correct = False model = estimator_abfe.FreeEnergyModel( unbound_potentials, endpoint_correct, self.client, box0, x0, v0, integrator, barostat, self.host_schedule, self.equil_steps, self.prod_steps, beta, prefix, ) bound_potentials = [] for params, unbound_pot in zip(sys_params, model.unbound_potentials): bp = unbound_pot.bind(np.asarray(params)) bound_potentials.append(bp) all_args = [] for lamb_idx, lamb in enumerate(model.lambda_schedule): subsample_interval = 1000 all_args.append(( lamb, model.box, model.x0, model.v0, bound_potentials, model.integrator, model.barostat, model.equil_steps, model.prod_steps, subsample_interval, subsample_interval, model.lambda_schedule, )) if endpoint_correct: assert isinstance(bound_potentials[-1], potentials.HarmonicBond) all_args.append(( 1.0, model.box, model.x0, model.v0, bound_potentials[:-1], # strip out the restraints model.integrator, model.barostat, model.equil_steps, model.prod_steps, subsample_interval, subsample_interval, [], # no need to evaluate Us for the endpoint correction )) futures = [] if self.client is None: for args in all_args: futures.append(_MockFuture(estimator_abfe.simulate(*args))) else: for args in all_args: futures.append( self.client.submit(estimator_abfe.simulate, *args)) return sys_params, model, futures
def _futures_a_to_b(self, ff_params, mol_a, mol_b, combined_core_idxs, x0, box0, prefix, seed): num_host_atoms = x0.shape[0] - mol_a.GetNumAtoms() - mol_b.GetNumAtoms( ) # (ytz): super ugly, undo combined_core_idxs to get back original idxs core_idxs = combined_core_idxs - num_host_atoms core_idxs[:, 1] -= mol_a.GetNumAtoms() dual_topology = self.setup_topology(mol_a, mol_b) rfe = free_energy_rabfe.RelativeFreeEnergy(dual_topology) unbound_potentials, sys_params, masses = rfe.prepare_host_edge( ff_params, self.host_system) k_core = 30.0 core_params = np.zeros_like(combined_core_idxs).astype(np.float64) core_params[:, 0] = k_core restraint_potential = potentials.HarmonicBond(combined_core_idxs, ) unbound_potentials.append(restraint_potential) sys_params.append(core_params) # tbd sample from boltzmann distribution later v0 = np.zeros_like(x0) beta = 1 / (constants.BOLTZ * self.temperature) bond_list = np.concatenate( [unbound_potentials[0].get_idxs(), core_idxs]) masses = model_utils.apply_hmr(masses, bond_list) friction = 1.0 integrator = LangevinIntegrator(self.temperature, self.dt, friction, masses, seed) bond_list = list(map(tuple, bond_list)) group_indices = get_group_indices(bond_list) barostat_interval = 5 barostat = MonteCarloBarostat(x0.shape[0], self.pressure, self.temperature, group_indices, barostat_interval, seed) endpoint_correct = True model = estimator_abfe.FreeEnergyModel( unbound_potentials, endpoint_correct, self.client, box0, # important, use equilibrated box. x0, v0, integrator, barostat, self.host_schedule, self.equil_steps, self.prod_steps, beta, prefix, ) bound_potentials = [] for params, unbound_pot in zip(sys_params, model.unbound_potentials): bp = unbound_pot.bind(np.asarray(params)) bound_potentials.append(bp) all_args = [] for lamb_idx, lamb in enumerate(model.lambda_schedule): subsample_interval = 1000 all_args.append(( lamb, model.box, model.x0, model.v0, bound_potentials, model.integrator, model.barostat, model.equil_steps, model.prod_steps, subsample_interval, subsample_interval, model.lambda_schedule, )) if endpoint_correct: assert isinstance(bound_potentials[-1], potentials.HarmonicBond) all_args.append(( 1.0, model.box, model.x0, model.v0, bound_potentials[:-1], # strip out the restraints model.integrator, model.barostat, model.equil_steps, model.prod_steps, subsample_interval, subsample_interval, [], # no need to evaluate Us for the endpoint correction )) futures = [] if self.client is None: for args in all_args: futures.append(_MockFuture(estimator_abfe.simulate(*args))) else: for args in all_args: futures.append( self.client.submit(estimator_abfe.simulate, *args)) return sys_params, model, futures
def equilibrate_host( mol: Chem.Mol, host_system: openmm.System, host_coords: NDArray, temperature: float, pressure: float, ff: Forcefield, box: NDArray, n_steps: int, seed: Optional[int] = None, ) -> Tuple[NDArray, NDArray]: """ Equilibrate a host system given a reference molecule using the MonteCarloBarostat. Useful for preparing a host that will be used for multiple FEP calculations using the same reference, IE a starmap. Performs the following: - Minimize host with rigid mol - Minimize host and mol - Run n_steps with HMR enabled and MonteCarloBarostat every 5 steps Parameters ---------- mol: Chem.Mol Ligand for the host to equilibrate with. host_system: openmm.System OpenMM System representing the host. host_coords: np.ndarray N x 3 coordinates of the host. units of nanometers. temperature: float Temperature at which to run the simulation. Units of kelvins. pressure: float Pressure at which to run the simulation. Units of bars. ff: ff.Forcefield Wrapper class around a list of handlers. box: np.ndarray [3,3] Box matrix for periodic boundary conditions. units of nanometers. n_steps: int Number of steps to run the simulation for. seed: int or None Value to seed simulation with Returns ------- tuple (coords, box) Returns equilibrated system coords as well as the box. """ # insert mol into the binding pocket. host_bps, host_masses = openmm_deserializer.deserialize_system(host_system, cutoff=1.2) min_host_coords = minimize_host_4d([mol], host_system, host_coords, ff, box) ligand_masses = [a.GetMass() for a in mol.GetAtoms()] ligand_coords = get_romol_conf(mol) combined_masses = np.concatenate([host_masses, ligand_masses]) combined_coords = np.concatenate([min_host_coords, ligand_coords]) top = topology.BaseTopology(mol, ff) hgt = topology.HostGuestTopology(host_bps, top) # setup the parameter handlers for the ligand tuples = [ [hgt.parameterize_harmonic_bond, [ff.hb_handle]], [hgt.parameterize_harmonic_angle, [ff.ha_handle]], [hgt.parameterize_periodic_torsion, [ff.pt_handle, ff.it_handle]], [hgt.parameterize_nonbonded, [ff.q_handle, ff.lj_handle]], ] u_impls = [] bound_potentials = [] for fn, handles in tuples: params, potential = fn(*[h.params for h in handles]) bp = potential.bind(params) bound_potentials.append(bp) u_impls.append(bp.bound_impl(precision=np.float32)) bond_list = get_bond_list(bound_potentials[0]) combined_masses = model_utils.apply_hmr(combined_masses, bond_list) dt = 2.5e-3 friction = 1.0 if seed is None: seed = np.random.randint(np.iinfo(np.int32).max) integrator = LangevinIntegrator(temperature, dt, friction, combined_masses, seed).impl() x0 = combined_coords v0 = np.zeros_like(x0) group_indices = get_group_indices(bond_list) barostat_interval = 5 barostat = MonteCarloBarostat(x0.shape[0], pressure, temperature, group_indices, barostat_interval, seed).impl( u_impls ) # Re-minimize with the mol being flexible x0 = fire_minimize(x0, u_impls, box, np.ones(50)) # context components: positions, velocities, box, integrator, energy fxns ctxt = custom_ops.Context(x0, v0, box, integrator, u_impls, barostat) ctxt.multiple_steps(np.linspace(0.0, 0.0, n_steps)) return ctxt.get_x_t(), ctxt.get_box()
def equilibrate_edges( self, edges: List[Tuple[Chem.Mol, Chem.Mol, np.ndarray]], lamb: float = 0.0, barostat_interval: int = 10, equilibration_steps: int = 100000, cache_path: str = "equilibration_cache.pkl", ): """ edges: List of tuples with mol_a, mol_b, core Edges to equilibrate lamb: float Lambda value to equilibrate at. Uses Dual Topology to equilibrate barostat_interval: int Interval on which to run barostat during equilibration equilibration_steps: int Number of steps to equilibrate the edge for cache_path: string Path to look for existing cache or path to where to save cache. By default it will write out a pickle file in the local directory. Pre equilibrate edges and cache them for later use in predictions. Parallelized via the model client if possible """ if not self.pre_equilibrate: return if os.path.isfile(cache_path): with open(cache_path, "rb") as ifs: self._equil_cache = load(ifs) print("Loaded Pre-equilibrated structures from cache") return futures = [] ordered_params = self.ff.get_ordered_params() temperature = 300.0 pressure = 1.0 for stage, host_system, host_coords, host_box in [ ("complex", self.complex_system, self.complex_coords, self.complex_box), ("solvent", self.solvent_system, self.solvent_coords, self.solvent_box), ]: # Run all complex legs first then solvent, as they will likely take longer than then solvent leg for mol_a, mol_b, core in edges: # Use DualTopology to ensure mols exist in the same space. topo = topology.DualTopologyMinimization(mol_a, mol_b, self.ff) rfe = free_energy.RelativeFreeEnergy(topo) min_coords = minimizer.minimize_host_4d([mol_a, mol_b], host_system, host_coords, self.ff, host_box) unbound_potentials, sys_params, masses, coords = rfe.prepare_host_edge( ordered_params, host_system, min_coords) # num_host_coords = len(host_coords) # masses[num_host_coords:] *= 1000000 # Lets see if masses are the difference harmonic_bond_potential = unbound_potentials[0] bond_list = get_bond_list(harmonic_bond_potential) group_idxs = get_group_indices(bond_list) time_step = 1.5e-3 if self.hmr: masses = apply_hmr(masses, bond_list) time_step = 2.5e-3 integrator = LangevinIntegrator(temperature, time_step, 1.0, masses, 0) barostat = MonteCarloBarostat(coords.shape[0], pressure, temperature, group_idxs, barostat_interval, 0) pots = [] for bp, params in zip(unbound_potentials, sys_params): pots.append(bp.bind(np.asarray(params))) future = self.client.submit( estimator.equilibrate, *[ integrator, barostat, pots, coords, host_box, lamb, equilibration_steps ]) futures.append((stage, (mol_a, mol_b, core), future)) num_equil = len(futures) for i, (stage, edge, future) in enumerate(futures): edge_hash = self._edge_hash(stage, *edge) self._equil_cache[edge_hash] = future.result() if (i + 1) % 5 == 0: print(f"Pre-equilibrated {i+1} of {num_equil} edges") print(f"Pre-equilibrated {num_equil} edges") if cache_path: with open(cache_path, "wb") as ofs: dump(self._equil_cache, ofs) print(f"Saved equilibration_cache to {cache_path}")
def predict(self, ff_params: list, mol_a: Chem.Mol, mol_b: Chem.Mol, core: np.ndarray): """ Predict the ddG of morphing mol_a into mol_b. This function is differentiable w.r.t. ff_params. Parameters ---------- ff_params: list of np.ndarray This should match the ordered params returned by the forcefield mol_a: Chem.Mol Starting molecule corresponding to lambda = 0 mol_b: Chem.Mol Starting molecule corresponding to lambda = 1 core: np.ndarray N x 2 list of ints corresponding to the atom mapping of the core. Returns ------- float delta delta G in kJ/mol aux list of TI results """ stage_dGs = [] stage_results = [] for stage, host_system, host_coords, host_box, lambda_schedule in [ ("complex", self.complex_system, self.complex_coords, self.complex_box, self.complex_schedule), ("solvent", self.solvent_system, self.solvent_coords, self.solvent_box, self.solvent_schedule), ]: single_topology = topology.SingleTopology(mol_a, mol_b, core, self.ff) rfe = free_energy.RelativeFreeEnergy(single_topology) edge_hash = self._edge_hash(stage, mol_a, mol_b, core) if self.pre_equilibrate and edge_hash in self._equil_cache: cached_state = self._equil_cache[edge_hash] x0 = cached_state.coords host_box = cached_state.box num_host_coords = len(host_coords) unbound_potentials, sys_params, masses, _ = rfe.prepare_host_edge( ff_params, host_system, host_coords) mol_a_size = mol_a.GetNumAtoms() # Use Dual Topology to pre equilibrate, so have to get the mean of the two sets of mol, # normally done within prepare_host_edge, but the whole system has moved by this stage x0 = np.concatenate([ x0[:num_host_coords], np.mean( single_topology.interpolate_params( x0[num_host_coords:num_host_coords + mol_a_size], x0[num_host_coords + mol_a_size:]), axis=0, ), ]) else: if self.pre_equilibrate: print( "Edge not correctly pre-equilibrated, ensure equilibrate_edges was called" ) print( f"Minimizing the {stage} host structure to remove clashes." ) # (ytz): this isn't strictly symmetric, and we should modify minimize later on remove # the hysteresis by jointly minimizing against a and b at the same time. We may also want # to remove the randomness completely from the minimization. min_host_coords = minimizer.minimize_host_4d([mol_a, mol_b], host_system, host_coords, self.ff, host_box) unbound_potentials, sys_params, masses, coords = rfe.prepare_host_edge( ff_params, host_system, min_host_coords) x0 = coords v0 = np.zeros_like(x0) time_step = 1.5e-3 harmonic_bond_potential = unbound_potentials[0] bond_list = get_bond_list(harmonic_bond_potential) if self.hmr: masses = apply_hmr(masses, bond_list) time_step = 2.5e-3 group_idxs = get_group_indices(bond_list) seed = 0 temperature = 300.0 pressure = 1.0 integrator = LangevinIntegrator(temperature, time_step, 1.0, masses, seed) barostat = MonteCarloBarostat(x0.shape[0], pressure, temperature, group_idxs, self.barostat_interval, seed) model = estimator.FreeEnergyModel( unbound_potentials, self.client, host_box, x0, v0, integrator, lambda_schedule, self.equil_steps, self.prod_steps, barostat, ) dG, results = estimator.deltaG(model, sys_params) stage_dGs.append(dG) stage_results.append((stage, results)) pred = stage_dGs[0] - stage_dGs[1] return pred, stage_results
def benchmark( label, masses, lamb, x0, v0, box, bound_potentials, hmr=False, verbose=True, num_batches=100, steps_per_batch=1000, compute_du_dl_interval=0, barostat_interval=0, ): """ TODO: configuration blob containing num_batches, steps_per_batch, and any other options """ seed = 1234 dt = 1.5e-3 temperature = 300 pressure = 1.0 seconds_per_day = 86400 harmonic_bond_potential = bound_potentials[0] bond_list = get_bond_list(harmonic_bond_potential) if hmr: dt = 2.5e-3 masses = apply_hmr(masses, bond_list) intg = LangevinIntegrator(temperature, dt, 1.0, np.array(masses), seed).impl() bps = [] for potential in bound_potentials: bps.append(potential.bound_impl(precision=np.float32)) # get the bound implementation baro_impl = None if barostat_interval > 0: group_idxs = get_group_indices(bond_list) baro = MonteCarloBarostat( x0.shape[0], pressure, temperature, group_idxs, barostat_interval, seed, ) baro_impl = baro.impl(bps) ctxt = custom_ops.Context( x0, v0, box, intg, bps, barostat=baro_impl, ) batch_times = [] lambda_schedule = np.ones(steps_per_batch) * lamb # run once before timer starts ctxt.multiple_steps(lambda_schedule, compute_du_dl_interval) start = time.time() for batch in range(num_batches): # time the current batch batch_start = time.time() du_dls, _, _ = ctxt.multiple_steps(lambda_schedule, compute_du_dl_interval) batch_end = time.time() delta = batch_end - batch_start batch_times.append(delta) steps_per_second = steps_per_batch / np.mean(batch_times) steps_per_day = steps_per_second * seconds_per_day ps_per_day = dt * steps_per_day ns_per_day = ps_per_day * 1e-3 if verbose: print(f"steps per second: {steps_per_second:.3f}") print(f"ns per day: {ns_per_day:.3f}") assert np.all(np.abs(ctxt.get_x_t()) < 1000) print( f"{label}: N={x0.shape[0]} speed: {ns_per_day:.2f}ns/day dt: {dt*1e3}fs (ran {steps_per_batch * num_batches} steps in {(time.time() - start):.2f}s)" )