def vacuum_model(ff_params): unbound_potentials, sys_params, masses, coords = rfe.prepare_vacuum_edge( ff_params) x0 = coords v0 = np.zeros_like(coords) client = CUDAPoolClient(1) box = np.eye(3, dtype=np.float64) * 100 harmonic_bond_potential = unbound_potentials[0] group_idxs = get_group_indices(get_bond_list(harmonic_bond_potential)) x0 = coords v0 = np.zeros_like(coords) client = CUDAPoolClient(1) temperature = 300.0 pressure = 1.0 integrator = LangevinIntegrator(temperature, 1.5e-3, 1.0, masses, seed) barostat = MonteCarloBarostat(x0.shape[0], pressure, temperature, group_idxs, 25, seed) model = estimator.FreeEnergyModel(unbound_potentials, client, box, x0, v0, integrator, lambda_schedule, equil_steps, prod_steps, barostat) return estimator.deltaG(model, sys_params)[0]
def test_free_energy_estimator(): n_atoms = 5 x0 = np.random.rand(n_atoms, 3) v0 = np.zeros_like(x0) n_bonds = 3 n_angles = 4 hb_pot, hb_params = get_harmonic_bond(n_atoms, n_bonds) ha_pot, ha_params = get_harmonic_angle(n_atoms, n_angles) sys_params = [hb_params, ha_params] unbound_potentials = [hb_pot, ha_pot] masses = np.random.rand(n_atoms) box = np.eye(3, dtype=np.float64) seed = 2021 group_idxs = get_group_indices(get_bond_list(hb_pot)) temperature = 300.0 pressure = 1.0 integrator = LangevinIntegrator(temperature, 1.5e-3, 1.0, masses, seed) barostat = MonteCarloBarostat(x0.shape[0], pressure, temperature, group_idxs, 25, seed) beta = 0.125 lambda_schedule = np.linspace(0, 1.0, 4) def loss_fn(sys_params): endpoint_correct = False mdl = estimator_abfe.FreeEnergyModel( unbound_potentials, endpoint_correct, client, box, x0, v0, integrator, barostat, lambda_schedule, 100, 100, beta, "test", ) dG, bar_dG_err, results = estimator_abfe.deltaG(mdl, sys_params) return dG**2 for client in [None, CUDAPoolClient(1)]: loss_fn(sys_params)
def _get_integrator(combined_masses): """ Get a integrator. The resulting impl must be bound to a python handle whose lifetime is concurrent with that of the context. """ seed = np.random.randint(np.iinfo(np.int32).max) return LangevinIntegrator(300.0, 1.5e-3, 1.0, combined_masses, seed)
def do_deletion( x0, v0, combined_bps, combined_masses, box, guest_name, leg_type, u_impls, deletion_steps, ): seed = 2021 intg = LangevinIntegrator(300.0, 1.5e-3, 1.0, combined_masses, seed).impl() ctxt = custom_ops.Context(x0, v0, box, intg, u_impls) # du_dl_obs = custom_ops.FullPartialUPartialLambda(u_impls, subsample_freq) # ctxt.add_observable(du_dl_obs) deletion_lambda_schedule = np.linspace(MIN_LAMBDA, DELETION_MAX_LAMBDA, deletion_steps) subsample_freq = 1 full_du_dls, _, _ = ctxt.multiple_steps(deletion_lambda_schedule, subsample_freq) step = len(deletion_lambda_schedule) - 1 lamb = deletion_lambda_schedule[-1] ctxt.step(lamb) report.report_step( ctxt, step, lamb, box, combined_bps, u_impls, guest_name, deletion_steps, f"{leg_type.upper()}_DELETION", ) if report.too_much_force(ctxt, lamb, box, combined_bps, u_impls): print("Not calculating work (too much force)") return None # Note: this condition only applies for ABFE, not RBFE if abs(full_du_dls[0]) > 0.001 or abs(full_du_dls[-1]) > 0.001: print("Not calculating work (du_dl endpoints are not ~0)") return None work = np.trapz(full_du_dls, deletion_lambda_schedule[::subsample_freq]) print(f"guest_name: {guest_name}\t{leg_type}_work: {work:.2f}") return work
def do_deletion(x0, v0, combined_bps, combined_masses, box, guest_name, leg_type): seed = 2021 intg = LangevinIntegrator(300.0, 1.5e-3, 1.0, combined_masses, seed).impl() u_impls = [] for bp in combined_bps: bp_impl = bp.bound_impl(precision=np.float32) u_impls.append(bp_impl) ctxt = custom_ops.Context(x0, v0, box, intg, u_impls) subsample_freq = 2 du_dl_obs = custom_ops.FullPartialUPartialLambda(u_impls, subsample_freq) ctxt.add_observable(du_dl_obs) deletion_lambda_schedule = np.linspace( MIN_LAMBDA, DELETION_MAX_LAMBDA, TRANSITION_STEPS ) calc_work = True for step, lamb in enumerate(deletion_lambda_schedule): ctxt.step(lamb) if step % 100 == 0: report.report_step( ctxt, step, lamb, box, combined_bps, u_impls, guest_name, TRANSITION_STEPS, f"{leg_type.upper()}_DELETION", ) if step in (0, int(TRANSITION_STEPS/2), TRANSITION_STEPS-1): if report.too_much_force(ctxt, lamb, box, combined_bps, u_impls): calc_work = False return # Note: this condition only applies for ABFE, not RBFE if ( abs(du_dl_obs.full_du_dl()[0]) > 0.001 or abs(du_dl_obs.full_du_dl()[-1]) > 0.001 ): print("Error: du_dl endpoints are not ~0") calc_work = False if calc_work: work = np.trapz( du_dl_obs.full_du_dl(), deletion_lambda_schedule[::subsample_freq] ) print(f"guest_name: {guest_name}\t{leg_type}_work: {work:.2f}")
def binding_model(ff_params): dGs = [] for host_system, host_coords, host_box in [ (complex_system, complex_coords, complex_box), (solvent_system, solvent_coords, solvent_box), ]: # minimize the host to avoid clashes host_coords = minimizer.minimize_host_4d([mol_a], host_system, host_coords, ff, host_box) unbound_potentials, sys_params, masses, coords = rfe.prepare_host_edge( ff_params, host_system, host_coords) x0 = coords v0 = np.zeros_like(coords) client = CUDAPoolClient(1) harmonic_bond_potential = unbound_potentials[0] group_idxs = get_group_indices( get_bond_list(harmonic_bond_potential)) temperature = 300.0 pressure = 1.0 integrator = LangevinIntegrator(temperature, 1.5e-3, 1.0, masses, seed) barostat = MonteCarloBarostat(x0.shape[0], pressure, temperature, group_idxs, 25, seed) model = estimator.FreeEnergyModel( unbound_potentials, client, host_box, x0, v0, integrator, lambda_schedule, equil_steps, prod_steps, barostat, ) dG, _ = estimator.deltaG(model, sys_params) dGs.append(dG) return dGs[0] - dGs[1]
def do_switch( x0, v0, combined_bps, combined_masses, box, guest_name, leg_type, u_impls, transition_steps, ): seed = 2021 intg = LangevinIntegrator(300.0, 1.5e-3, 1.0, combined_masses, seed).impl() ctxt = custom_ops.Context(x0, v0, box, intg, u_impls) switching_lambda_schedule = np.linspace(MIN_LAMBDA, MAX_LAMBDA, transition_steps) subsample_interval = 1 full_du_dls, _, _ = ctxt.multiple_steps(switching_lambda_schedule, subsample_interval) step = len(switching_lambda_schedule) - 1 lamb = switching_lambda_schedule[-1] ctxt.step(lamb) report.report_step( ctxt, step, lamb, box, combined_bps, u_impls, guest_name, transition_steps, f"{leg_type.upper()}_SWITCH", ) if report.too_much_force(ctxt, lamb, box, combined_bps, u_impls): return work = np.trapz(full_du_dls, switching_lambda_schedule[::subsample_interval]) print(f"guest_name: {guest_name}\t{leg_type}_work: {work:.2f}") return work
def minimize(args): bound_potentials, masses, x0, box = args u_impls = [] for bp in bound_potentials: u_impls.append(bp.bound_impl(precision=np.float32)) seed = np.random.randint(np.iinfo(np.int32).max) intg = LangevinIntegrator(300.0, 1.5e-3, 1.0, masses, seed).impl() v0 = np.zeros_like(x0) ctxt = custom_ops.Context(x0, v0, box, intg, u_impls) lambda_schedule = np.linspace(0.35, 0.0, 500) for lamb in lambda_schedule: ctxt.step(lamb) return ctxt.get_x_t()
def pose_dock( guests_sdfile, host_pdbfile, transition_type, n_steps, transition_steps, max_lambda, outdir, random_rotation=False, constant_atoms=[], ): """Runs short simulations in which the guests phase in or out over time Parameters ---------- guests_sdfile: path to input sdf with guests to pose/dock host_pdbfile: path to host pdb file to dock into transition_type: "insertion" or "deletion" n_steps: how many total steps of simulation to do (recommended: <= 1000) transition_steps: how many steps to insert/delete the guest over (recommended: <= 500) (must be <= n_steps) max_lambda: lambda value the guest should insert from or delete to (recommended: 1.0 for work calulation, 0.25 to stay close to original pose) (must be =1 for work calculation to be applicable) outdir: where to write output (will be created if it does not already exist) random_rotation: whether to apply a random rotation to each guest before inserting constant_atoms: atom numbers from the host_pdbfile to hold mostly fixed across the simulation (1-indexed, like PDB files) Output ------ A pdb & sdf file every 100 steps (outdir/<guest_name>_<step>.pdb) stdout every 100 steps noting the step number, lambda value, and energy stdout for each guest noting the work of transition stdout for each guest noting how long it took to run Note ---- If any norm of force per atom exceeds 20000 kJ/(mol*nm) [MAX_NORM_FORCE defined in docking/report.py], the simulation for that guest will stop and the work will not be calculated. """ assert transition_steps <= n_steps assert transition_type in ("insertion", "deletion") if random_rotation: assert transition_type == "insertion" if not os.path.exists(outdir): os.makedirs(outdir) host_mol = Chem.MolFromPDBFile(host_pdbfile, removeHs=False) amber_ff = app.ForceField("amber99sbildn.xml", "tip3p.xml") host_file = PDBFile(host_pdbfile) host_system = amber_ff.createSystem( host_file.topology, nonbondedMethod=app.NoCutoff, constraints=None, rigidWater=False, ) host_conf = [] for x, y, z in host_file.positions: host_conf.append([to_md_units(x), to_md_units(y), to_md_units(z)]) host_conf = np.array(host_conf) final_potentials = [] host_potentials, host_masses = openmm_deserializer.deserialize_system( host_system, cutoff=1.2) host_nb_bp = None for bp in host_potentials: if isinstance(bp, potentials.Nonbonded): # (ytz): hack to ensure we only have one nonbonded term assert host_nb_bp is None host_nb_bp = bp else: final_potentials.append(bp) # TODO (ytz): we should really fix this later on. This padding was done to # address the particles that are too close to the boundary. padding = 0.1 box_lengths = np.amax(host_conf, axis=0) - np.amin(host_conf, axis=0) box_lengths = box_lengths + padding box = np.eye(3, dtype=np.float64) * box_lengths suppl = Chem.SDMolSupplier(guests_sdfile, removeHs=False) for guest_mol in suppl: start_time = time.time() guest_name = guest_mol.GetProp("_Name") guest_ff_handlers = deserialize_handlers( open( os.path.join( os.path.dirname(os.path.abspath(__file__)), "..", "ff/params/smirnoff_1_1_0_ccc.py", )).read()) ff = Forcefield(guest_ff_handlers) guest_base_topology = topology.BaseTopology(guest_mol, ff) # combine hgt = topology.HostGuestTopology(host_nb_bp, guest_base_topology) # setup the parameter handlers for the ligand bonded_tuples = [[hgt.parameterize_harmonic_bond, ff.hb_handle], [hgt.parameterize_harmonic_angle, ff.ha_handle], [hgt.parameterize_proper_torsion, ff.pt_handle], [hgt.parameterize_improper_torsion, ff.it_handle]] these_potentials = list(final_potentials) # instantiate the vjps while parameterizing (forward pass) for fn, handle in bonded_tuples: params, potential = fn(handle.params) these_potentials.append(potential.bind(params)) nb_params, nb_potential = hgt.parameterize_nonbonded( ff.q_handle.params, ff.lj_handle.params) these_potentials.append(nb_potential.bind(nb_params)) bps = these_potentials guest_masses = [a.GetMass() for a in guest_mol.GetAtoms()] masses = np.concatenate([host_masses, guest_masses]) for atom_num in constant_atoms: masses[atom_num - 1] += 50000 conformer = guest_mol.GetConformer(0) mol_conf = np.array(conformer.GetPositions(), dtype=np.float64) mol_conf = mol_conf / 10 # convert to md_units if random_rotation: center = np.mean(mol_conf, axis=0) mol_conf -= center from scipy.stats import special_ortho_group mol_conf = np.matmul(mol_conf, special_ortho_group.rvs(3)) mol_conf += center x0 = np.concatenate([host_conf, mol_conf]) # combined geometry v0 = np.zeros_like(x0) seed = 2021 intg = LangevinIntegrator(300, 1.5e-3, 1.0, masses, seed).impl() impls = [] precision = np.float32 for b in bps: p_impl = b.bound_impl(precision) impls.append(p_impl) ctxt = custom_ops.Context(x0, v0, box, intg, impls) # collect a du_dl calculation once every other step subsample_freq = 2 du_dl_obs = custom_ops.FullPartialUPartialLambda(impls, subsample_freq) ctxt.add_observable(du_dl_obs) if transition_type == "insertion": new_lambda_schedule = np.concatenate([ np.linspace(max_lambda, 0.0, transition_steps), np.zeros(n_steps - transition_steps), ]) elif transition_type == "deletion": new_lambda_schedule = np.concatenate([ np.linspace(0.0, max_lambda, transition_steps), np.ones(n_steps - transition_steps) * max_lambda, ]) else: raise (RuntimeError( 'invalid `transition_type` (must be one of ["insertion", "deletion"])' )) calc_work = True for step, lamb in enumerate(new_lambda_schedule): ctxt.step(lamb) if step % 100 == 0: report.report_step(ctxt, step, lamb, box, bps, impls, guest_name, n_steps, 'pose_dock') host_coords = ctxt.get_x_t()[:len(host_conf)] * 10 guest_coords = ctxt.get_x_t()[len(host_conf):] * 10 report.write_frame(host_coords, host_mol, guest_coords, guest_mol, guest_name, outdir, step, 'pd') if step in (0, int(n_steps / 2), n_steps - 1): if report.too_much_force(ctxt, lamb, box, bps, impls): calc_work = False break # Note: this condition only applies for ABFE, not RBFE if (abs(du_dl_obs.full_du_dl()[0]) > 0.001 or abs(du_dl_obs.full_du_dl()[-1]) > 0.001): print("Error: du_dl endpoints are not ~0") calc_work = False if calc_work: work = np.trapz(du_dl_obs.full_du_dl(), new_lambda_schedule[::subsample_freq]) print(f"guest_name: {guest_name}\twork: {work:.2f}") end_time = time.time() print(f"{guest_name} took {(end_time - start_time):.2f} seconds")
def predict(self, ff_params: list, mol_a: Chem.Mol, mol_b: Chem.Mol, core: np.ndarray): """ Predict the ddG of morphing mol_a into mol_b. This function is differentiable w.r.t. ff_params. Parameters ---------- ff_params: list of np.ndarray This should match the ordered params returned by the forcefield mol_a: Chem.Mol Starting molecule corresponding to lambda = 0 mol_b: Chem.Mol Starting molecule corresponding to lambda = 1 core: np.ndarray N x 2 list of ints corresponding to the atom mapping of the core. Returns ------- float delta delta G in kJ/mol aux list of TI results """ stage_dGs = [] stage_results = [] for stage, host_system, host_coords, host_box, lambda_schedule in [ ("complex", self.complex_system, self.complex_coords, self.complex_box, self.complex_schedule), ("solvent", self.solvent_system, self.solvent_coords, self.solvent_box, self.solvent_schedule), ]: single_topology = topology.SingleTopology(mol_a, mol_b, core, self.ff) rfe = free_energy.RelativeFreeEnergy(single_topology) edge_hash = self._edge_hash(stage, mol_a, mol_b, core) if self.pre_equilibrate and edge_hash in self._equil_cache: cached_state = self._equil_cache[edge_hash] x0 = cached_state.coords host_box = cached_state.box num_host_coords = len(host_coords) unbound_potentials, sys_params, masses, _ = rfe.prepare_host_edge( ff_params, host_system, host_coords) mol_a_size = mol_a.GetNumAtoms() # Use Dual Topology to pre equilibrate, so have to get the mean of the two sets of mol, # normally done within prepare_host_edge, but the whole system has moved by this stage x0 = np.concatenate([ x0[:num_host_coords], np.mean( single_topology.interpolate_params( x0[num_host_coords:num_host_coords + mol_a_size], x0[num_host_coords + mol_a_size:]), axis=0, ), ]) else: if self.pre_equilibrate: print( "Edge not correctly pre-equilibrated, ensure equilibrate_edges was called" ) print( f"Minimizing the {stage} host structure to remove clashes." ) # (ytz): this isn't strictly symmetric, and we should modify minimize later on remove # the hysteresis by jointly minimizing against a and b at the same time. We may also want # to remove the randomness completely from the minimization. min_host_coords = minimizer.minimize_host_4d([mol_a, mol_b], host_system, host_coords, self.ff, host_box) unbound_potentials, sys_params, masses, coords = rfe.prepare_host_edge( ff_params, host_system, min_host_coords) x0 = coords v0 = np.zeros_like(x0) time_step = 1.5e-3 harmonic_bond_potential = unbound_potentials[0] bond_list = get_bond_list(harmonic_bond_potential) if self.hmr: masses = apply_hmr(masses, bond_list) time_step = 2.5e-3 group_idxs = get_group_indices(bond_list) seed = 0 temperature = 300.0 pressure = 1.0 integrator = LangevinIntegrator(temperature, time_step, 1.0, masses, seed) barostat = MonteCarloBarostat(x0.shape[0], pressure, temperature, group_idxs, self.barostat_interval, seed) model = estimator.FreeEnergyModel( unbound_potentials, self.client, host_box, x0, v0, integrator, lambda_schedule, self.equil_steps, self.prod_steps, barostat, ) dG, results = estimator.deltaG(model, sys_params) stage_dGs.append(dG) stage_results.append((stage, results)) pred = stage_dGs[0] - stage_dGs[1] return pred, stage_results
def test_free_energy_estimator_with_endpoint_correction(): """ Test that we generate correctly shaped derivatives in the estimator code when the endpoint correction is turned on. We expected that f([a,b,c,...]) to generate derivatives df/da, df/db, df/dc, df/d... such that df/da.shape == a.shape, df/db.shape == b.shape, df/dc == c.shape, and etc. """ n_atoms = 15 x0 = np.random.rand(n_atoms, 3) v0 = np.zeros_like(x0) n_bonds = 3 n_angles = 4 n_restraints = 5 hb_pot, hb_params = get_harmonic_bond(n_atoms, n_bonds) ha_pot, ha_params = get_harmonic_angle(n_atoms, n_angles) rs_pot, rs_params = get_harmonic_restraints(n_atoms, n_restraints) sys_params = [hb_params, ha_params, rs_params] unbound_potentials = [hb_pot, ha_pot, rs_pot] masses = np.random.rand(n_atoms) box = np.eye(3, dtype=np.float64) seed = 2021 group_idxs = get_group_indices(get_bond_list(hb_pot)) temperature = 300.0 pressure = 1.0 integrator = LangevinIntegrator(temperature, 1.5e-3, 1.0, masses, seed) barostat = MonteCarloBarostat(x0.shape[0], pressure, temperature, group_idxs, 25, seed) beta = 0.125 lambda_schedule = np.linspace(0, 1.0, 4) def loss_fn(sys_params): endpoint_correct = True mdl = estimator_abfe.FreeEnergyModel( unbound_potentials, endpoint_correct, client, box, x0, v0, integrator, barostat, lambda_schedule, 100, 100, beta, "test", ) dG, bar_dG_err, results = estimator_abfe.deltaG(mdl, sys_params) return dG**2 for client in [None, CUDAPoolClient(1)]: loss_fn(sys_params)
def dock_and_equilibrate(host_pdbfile, guests_sdfile, max_lambda, insertion_steps, eq_steps, outdir, fewer_outfiles=False, constant_atoms=[]): """Solvates a host, inserts guest(s) into solvated host, equilibrates Parameters ---------- host_pdbfile: path to host pdb file to dock into guests_sdfile: path to input sdf with guests to pose/dock max_lambda: lambda value the guest should insert from or delete to (recommended: 1.0 for work calulation, 0.25 to stay close to original pose) (must be =1 for work calculation to be applicable) insertion_steps: how many steps to insert the guest over (recommended: 501) eq_steps: how many steps of equilibration to do after insertion (recommended: 15001) outdir: where to write output (will be created if it does not already exist) fewer_outfiles: if True, will only write frames for the equilibration, not insertion constant_atoms: atom numbers from the host_pdbfile to hold mostly fixed across the simulation (1-indexed, like PDB files) Output ------ A pdb & sdf file every 100 steps of insertion (outdir/<guest_name>/<guest_name>_<step>.[pdb/sdf]) A pdb & sdf file every 1000 steps of equilibration (outdir/<guest_name>/<guest_name>_<step>.[pdb/sdf]) stdout every 100(0) steps noting the step number, lambda value, and energy stdout for each guest noting the work of transition stdout for each guest noting how long it took to run Note ---- If any norm of force per atom exceeds 20000 kJ/(mol*nm) [MAX_NORM_FORCE defined in docking/report.py], the simulation for that guest will stop and the work will not be calculated. """ if not os.path.exists(outdir): os.makedirs(outdir) print(f""" HOST_PDBFILE = {host_pdbfile} GUESTS_SDFILE = {guests_sdfile} OUTDIR = {outdir} MAX_LAMBDA = {max_lambda} INSERTION_STEPS = {insertion_steps} EQ_STEPS = {eq_steps} """) # Prepare host # TODO: handle extra (non-transitioning) guests? print("Solvating host...") # TODO: return topology from builders.build_protein_system ( solvated_host_system, solvated_host_coords, _, _, host_box, solvated_topology, ) = builders.build_protein_system(host_pdbfile) # sometimes water boxes are sad. Should be minimized first; this is a workaround host_box += np.eye(3) * 0.1 print("host box", host_box) solvated_host_pdb = os.path.join(outdir, "solvated_host.pdb") writer = pdb_writer.PDBWriter([solvated_topology], solvated_host_pdb) writer.write_frame(solvated_host_coords) writer.close() solvated_host_mol = Chem.MolFromPDBFile(solvated_host_pdb, removeHs=False) os.remove(solvated_host_pdb) final_host_potentials = [] host_potentials, host_masses = openmm_deserializer.deserialize_system( solvated_host_system, cutoff=1.2) host_nb_bp = None for bp in host_potentials: if isinstance(bp, potentials.Nonbonded): # (ytz): hack to ensure we only have one nonbonded term assert host_nb_bp is None host_nb_bp = bp else: final_host_potentials.append(bp) # Run the procedure print("Getting guests...") suppl = Chem.SDMolSupplier(guests_sdfile, removeHs=False) for guest_mol in suppl: start_time = time.time() guest_name = guest_mol.GetProp("_Name") guest_conformer = guest_mol.GetConformer(0) orig_guest_coords = np.array(guest_conformer.GetPositions(), dtype=np.float64) orig_guest_coords = orig_guest_coords / 10 # convert to md_units guest_ff_handlers = deserialize_handlers( open( os.path.join( os.path.dirname(os.path.abspath(__file__)), "..", "ff/params/smirnoff_1_1_0_ccc.py", )).read()) ff = Forcefield(guest_ff_handlers) guest_base_top = topology.BaseTopology(guest_mol, ff) # combine host & guest hgt = topology.HostGuestTopology(host_nb_bp, guest_base_top) # setup the parameter handlers for the ligand bonded_tuples = [[hgt.parameterize_harmonic_bond, ff.hb_handle], [hgt.parameterize_harmonic_angle, ff.ha_handle], [hgt.parameterize_proper_torsion, ff.pt_handle], [hgt.parameterize_improper_torsion, ff.it_handle]] combined_bps = list(final_host_potentials) # instantiate the vjps while parameterizing (forward pass) for fn, handle in bonded_tuples: params, potential = fn(handle.params) combined_bps.append(potential.bind(params)) nb_params, nb_potential = hgt.parameterize_nonbonded( ff.q_handle.params, ff.lj_handle.params) combined_bps.append(nb_potential.bind(nb_params)) guest_masses = [a.GetMass() for a in guest_mol.GetAtoms()] combined_masses = np.concatenate([host_masses, guest_masses]) x0 = np.concatenate([solvated_host_coords, orig_guest_coords]) v0 = np.zeros_like(x0) print( f"SYSTEM", f"guest_name: {guest_name}", f"num_atoms: {len(x0)}", ) for atom_num in constant_atoms: combined_masses[atom_num - 1] += 50000 seed = 2021 intg = LangevinIntegrator(300.0, 1.5e-3, 1.0, combined_masses, seed).impl() u_impls = [] for bp in combined_bps: bp_impl = bp.bound_impl(precision=np.float32) u_impls.append(bp_impl) ctxt = custom_ops.Context(x0, v0, host_box, intg, u_impls) # collect a du_dl calculation once every other step subsample_freq = 2 du_dl_obs = custom_ops.FullPartialUPartialLambda( u_impls, subsample_freq) ctxt.add_observable(du_dl_obs) # insert guest insertion_lambda_schedule = np.linspace(max_lambda, 0.0, insertion_steps) calc_work = True for step, lamb in enumerate(insertion_lambda_schedule): ctxt.step(lamb) if step % 100 == 0: report.report_step(ctxt, step, lamb, host_box, combined_bps, u_impls, guest_name, insertion_steps, "INSERTION") if not fewer_outfiles: host_coords = ctxt.get_x_t()[:len(solvated_host_coords )] * 10 guest_coords = ctxt.get_x_t()[len(solvated_host_coords ):] * 10 report.write_frame( host_coords, solvated_host_mol, guest_coords, guest_mol, guest_name, outdir, str(step).zfill(len(str(insertion_steps))), f"ins", ) if step in (0, int(insertion_steps / 2), insertion_steps - 1): if report.too_much_force(ctxt, lamb, host_box, combined_bps, u_impls): calc_work = False break # Note: this condition only applies for ABFE, not RBFE if (abs(du_dl_obs.full_du_dl()[0]) > 0.001 or abs(du_dl_obs.full_du_dl()[-1]) > 0.001): print("Error: du_dl endpoints are not ~0") calc_work = False if calc_work: work = np.trapz(du_dl_obs.full_du_dl(), insertion_lambda_schedule[::subsample_freq]) print(f"guest_name: {guest_name}\tinsertion_work: {work:.2f}") # equilibrate for step in range(eq_steps): ctxt.step(0.00) if step % 1000 == 0: report.report_step(ctxt, step, 0.00, host_box, combined_bps, u_impls, guest_name, eq_steps, 'EQUILIBRATION') host_coords = ctxt.get_x_t()[:len(solvated_host_coords)] * 10 guest_coords = ctxt.get_x_t()[len(solvated_host_coords):] * 10 report.write_frame( host_coords, solvated_host_mol, guest_coords, guest_mol, guest_name, outdir, str(step).zfill(len(str(eq_steps))), f"eq", ) if step in (0, int(eq_steps / 2), eq_steps - 1): if report.too_much_force(ctxt, 0.00, host_box, combined_bps, u_impls): break end_time = time.time() print(f"{guest_name} took {(end_time - start_time):.2f} seconds")
def run_leg( orig_host_coords, orig_guest_coords, combined_bps, combined_masses, host_box, guest_name, leg_type, host_mol, guest_mol, outdir, num_deletions, deletion_steps, insertion_max_lambda, insertion_steps, eq1_steps, fewer_outfiles=False, no_outfiles=False, ): x0 = np.concatenate([orig_host_coords, orig_guest_coords]) v0 = np.zeros_like(x0) print( f"{leg_type.upper()}_SYSTEM", f"guest_name: {guest_name}", f"num_atoms: {len(x0)}", ) seed = 2021 intg = LangevinIntegrator(300.0, 1.5e-3, 1.0, combined_masses, seed).impl() u_impls = [] for bp in combined_bps: bp_impl = bp.bound_impl(precision=np.float32) u_impls.append(bp_impl) ctxt = custom_ops.Context(x0, v0, host_box, intg, u_impls) # insert guest insertion_lambda_schedule = np.linspace(insertion_max_lambda, MIN_LAMBDA, insertion_steps) ctxt.multiple_steps(insertion_lambda_schedule, 0) # do not collect du_dls lamb = insertion_lambda_schedule[-1] step = len(insertion_lambda_schedule) - 1 report.report_step( ctxt, step, lamb, host_box, combined_bps, u_impls, guest_name, insertion_steps, f"{leg_type.upper()}_INSERTION", ) if not fewer_outfiles and not no_outfiles: host_coords = ctxt.get_x_t()[:len(orig_host_coords)] * 10 guest_coords = ctxt.get_x_t()[len(orig_host_coords):] * 10 report.write_frame( host_coords, host_mol, guest_coords, guest_mol, guest_name, outdir, str(step).zfill(len(str(insertion_steps))), f"{leg_type}-ins", ) if report.too_much_force(ctxt, lamb, host_box, combined_bps, u_impls): return [] # equilibrate equil_lambda_schedule = np.ones(eq1_steps) * MIN_LAMBDA lamb = equil_lambda_schedule[-1] step = len(equil_lambda_schedule) - 1 ctxt.multiple_steps(equil_lambda_schedule, 0) report.report_step( ctxt, step, MIN_LAMBDA, host_box, combined_bps, u_impls, guest_name, eq1_steps, f"{leg_type.upper()}_EQUILIBRATION_1", ) if not fewer_outfiles and not no_outfiles: host_coords = ctxt.get_x_t()[:len(orig_host_coords)] * 10 guest_coords = ctxt.get_x_t()[len(orig_host_coords):] * 10 report.write_frame( host_coords, host_mol, guest_coords, guest_mol, guest_name, outdir, str(step).zfill(len(str(eq1_steps))), f"{leg_type}-eq1", ) if report.too_much_force(ctxt, MIN_LAMBDA, host_box, combined_bps, u_impls): print("Too much force") return [] # equilibrate more & shoot off deletion jobs steps_per_batch = 1001 works = [] for b in range(num_deletions): deletion_lambda_schedule = np.ones(steps_per_batch) * MIN_LAMBDA ctxt.multiple_steps(deletion_lambda_schedule, 0) lamb = deletion_lambda_schedule[-1] step = len(deletion_lambda_schedule) - 1 report.report_step( ctxt, (b + 1) * step, MIN_LAMBDA, host_box, combined_bps, u_impls, guest_name, num_deletions * steps_per_batch, f"{leg_type.upper()}_EQUILIBRATION_2", ) # TODO: if guest has undocked, stop simulation if not no_outfiles: host_coords = ctxt.get_x_t()[:len(orig_host_coords)] * 10 guest_coords = ctxt.get_x_t()[len(orig_host_coords):] * 10 report.write_frame( host_coords, host_mol, guest_coords, guest_mol, guest_name, outdir, str((b + 1) * step).zfill( len(str(num_deletions * steps_per_batch))), f"{leg_type}-eq2", ) if report.too_much_force(ctxt, MIN_LAMBDA, host_box, combined_bps, u_impls): print("Too much force") return works work = do_deletion( ctxt.get_x_t(), ctxt.get_v_t(), combined_bps, combined_masses, host_box, guest_name, leg_type, u_impls, deletion_steps, ) works.append(work) return works
def _futures_a_to_b(self, ff_params, mol_a, mol_b, combined_core_idxs, x0, box0, prefix, seed): num_host_atoms = x0.shape[0] - mol_a.GetNumAtoms() - mol_b.GetNumAtoms( ) # (ytz): super ugly, undo combined_core_idxs to get back original idxs core_idxs = combined_core_idxs - num_host_atoms core_idxs[:, 1] -= mol_a.GetNumAtoms() dual_topology = self.setup_topology(mol_a, mol_b) rfe = free_energy_rabfe.RelativeFreeEnergy(dual_topology) unbound_potentials, sys_params, masses = rfe.prepare_host_edge( ff_params, self.host_system) k_core = 30.0 core_params = np.zeros_like(combined_core_idxs).astype(np.float64) core_params[:, 0] = k_core restraint_potential = potentials.HarmonicBond(combined_core_idxs, ) unbound_potentials.append(restraint_potential) sys_params.append(core_params) # tbd sample from boltzmann distribution later v0 = np.zeros_like(x0) beta = 1 / (constants.BOLTZ * self.temperature) bond_list = np.concatenate( [unbound_potentials[0].get_idxs(), core_idxs]) masses = model_utils.apply_hmr(masses, bond_list) friction = 1.0 integrator = LangevinIntegrator(self.temperature, self.dt, friction, masses, seed) bond_list = list(map(tuple, bond_list)) group_indices = get_group_indices(bond_list) barostat_interval = 5 barostat = MonteCarloBarostat(x0.shape[0], self.pressure, self.temperature, group_indices, barostat_interval, seed) endpoint_correct = True model = estimator_abfe.FreeEnergyModel( unbound_potentials, endpoint_correct, self.client, box0, # important, use equilibrated box. x0, v0, integrator, barostat, self.host_schedule, self.equil_steps, self.prod_steps, beta, prefix, ) bound_potentials = [] for params, unbound_pot in zip(sys_params, model.unbound_potentials): bp = unbound_pot.bind(np.asarray(params)) bound_potentials.append(bp) all_args = [] for lamb_idx, lamb in enumerate(model.lambda_schedule): subsample_interval = 1000 all_args.append(( lamb, model.box, model.x0, model.v0, bound_potentials, model.integrator, model.barostat, model.equil_steps, model.prod_steps, subsample_interval, subsample_interval, model.lambda_schedule, )) if endpoint_correct: assert isinstance(bound_potentials[-1], potentials.HarmonicBond) all_args.append(( 1.0, model.box, model.x0, model.v0, bound_potentials[:-1], # strip out the restraints model.integrator, model.barostat, model.equil_steps, model.prod_steps, subsample_interval, subsample_interval, [], # no need to evaluate Us for the endpoint correction )) futures = [] if self.client is None: for args in all_args: futures.append(_MockFuture(estimator_abfe.simulate(*args))) else: for args in all_args: futures.append( self.client.submit(estimator_abfe.simulate, *args)) return sys_params, model, futures
def minimize_host_4d(romol, host_system, host_coords, ff, box): """ Insert romol into a host system via 4D decoupling under a Langevin thermostat. The ligand coordinates are fixed during this, and only host_coordinates are minimized. Parameters ---------- romol: ROMol Ligand to be inserted. It must be embedded. host_system: openmm.System OpenMM System representing the host host_coords: np.ndarray N x 3 coordinates of the host. units of nanometers. ff: ff.Forcefield Wrapper class around a list of handlers box: np.ndarray [3,3] Box matrix for periodic boundary conditions. units of nanometers. Returns ------- np.ndarray This returns minimized host_coords. """ host_bps, host_masses = openmm_deserializer.deserialize_system(host_system, cutoff=1.2) # keep the ligand rigid ligand_masses = [a.GetMass()*100000 for a in romol.GetAtoms()] combined_masses = np.concatenate([host_masses, ligand_masses]) ligand_coords = get_romol_conf(romol) combined_coords = np.concatenate([host_coords, ligand_coords]) num_host_atoms = host_coords.shape[0] final_potentials = [] for bp in host_bps: if isinstance(bp, potentials.Nonbonded): host_p = bp else: final_potentials.append(bp) gbt = topology.BaseTopology(romol, ff) hgt = topology.HostGuestTopology(host_p, gbt) # setup the parameter handlers for the ligand tuples = [ [hgt.parameterize_harmonic_bond, [ff.hb_handle]], [hgt.parameterize_harmonic_angle, [ff.ha_handle]], [hgt.parameterize_proper_torsion, [ff.pt_handle]], [hgt.parameterize_improper_torsion, [ff.it_handle]], [hgt.parameterize_nonbonded, [ff.q_handle, ff.lj_handle]], ] for fn, handles in tuples: params, potential = fn(*[h.params for h in handles]) final_potentials.append(potential.bind(params)) seed = 2020 intg = LangevinIntegrator( 300.0, 1.5e-3, 1.0, combined_masses, seed ).impl() x0 = combined_coords v0 = np.zeros_like(x0) u_impls = [] for bp in final_potentials: fn = bp.bound_impl(precision=np.float32) u_impls.append(fn) # context components: positions, velocities, box, integrator, energy fxns ctxt = custom_ops.Context( x0, v0, box, intg, u_impls ) for lamb in np.linspace(1.0, 0, 1000): ctxt.step(lamb) return ctxt.get_x_t()[:num_host_atoms]
def minimize_host_4d(mols, host_system, host_coords, ff, box, mol_coords=None) -> np.ndarray: """ Insert mols into a host system via 4D decoupling using Fire minimizer at lambda=1.0, 0 Kelvin Langevin integration at a sequence of lambda from 1.0 to 0.0, and Fire minimizer again at lambda=0.0 The ligand coordinates are fixed during this, and only host_coords are minimized. Parameters ---------- mols: list of Chem.Mol Ligands to be inserted. This must be of length 1 or 2 for now. host_system: openmm.System OpenMM System representing the host host_coords: np.ndarray N x 3 coordinates of the host. units of nanometers. ff: ff.Forcefield Wrapper class around a list of handlers box: np.ndarray [3,3] Box matrix for periodic boundary conditions. units of nanometers. mol_coords: list of np.ndarray Pre-specify a list of mol coords. Else use the mol.GetConformer(0) Returns ------- np.ndarray This returns minimized host_coords. """ assert box.shape == (3, 3) host_bps, host_masses = openmm_deserializer.deserialize_system(host_system, cutoff=1.2) num_host_atoms = host_coords.shape[0] if len(mols) == 1: top = topology.BaseTopology(mols[0], ff) elif len(mols) == 2: top = topology.DualTopologyMinimization(mols[0], mols[1], ff) else: raise ValueError("mols must be length 1 or 2") mass_list = [np.array(host_masses)] conf_list = [np.array(host_coords)] for mol in mols: # mass increase is to keep the ligand fixed mass_list.append(np.array([a.GetMass() * 100000 for a in mol.GetAtoms()])) if mol_coords is not None: for mc in mol_coords: conf_list.append(mc) else: for mol in mols: conf_list.append(get_romol_conf(mol)) combined_masses = np.concatenate(mass_list) combined_coords = np.concatenate(conf_list) hgt = topology.HostGuestTopology(host_bps, top) u_impls = bind_potentials(hgt, ff) # this value doesn't matter since we will turn off the noise. seed = 0 intg = LangevinIntegrator(0.0, 1.5e-3, 1.0, combined_masses, seed).impl() x0 = combined_coords v0 = np.zeros_like(x0) x0 = fire_minimize(x0, u_impls, box, np.ones(50)) # context components: positions, velocities, box, integrator, energy fxns ctxt = custom_ops.Context(x0, v0, box, intg, u_impls) ctxt.multiple_steps(np.linspace(1.0, 0, 1000)) final_coords = fire_minimize(ctxt.get_x_t(), u_impls, box, np.zeros(50)) for impl in u_impls: du_dx, _, _ = impl.execute(final_coords, box, 0.0) norm = np.linalg.norm(du_dx, axis=-1) assert np.all(norm < 25000) return final_coords[:num_host_atoms]
def equilibrate_host( mol: Chem.Mol, host_system: openmm.System, host_coords: NDArray, temperature: float, pressure: float, ff: Forcefield, box: NDArray, n_steps: int, seed: Optional[int] = None, ) -> Tuple[NDArray, NDArray]: """ Equilibrate a host system given a reference molecule using the MonteCarloBarostat. Useful for preparing a host that will be used for multiple FEP calculations using the same reference, IE a starmap. Performs the following: - Minimize host with rigid mol - Minimize host and mol - Run n_steps with HMR enabled and MonteCarloBarostat every 5 steps Parameters ---------- mol: Chem.Mol Ligand for the host to equilibrate with. host_system: openmm.System OpenMM System representing the host. host_coords: np.ndarray N x 3 coordinates of the host. units of nanometers. temperature: float Temperature at which to run the simulation. Units of kelvins. pressure: float Pressure at which to run the simulation. Units of bars. ff: ff.Forcefield Wrapper class around a list of handlers. box: np.ndarray [3,3] Box matrix for periodic boundary conditions. units of nanometers. n_steps: int Number of steps to run the simulation for. seed: int or None Value to seed simulation with Returns ------- tuple (coords, box) Returns equilibrated system coords as well as the box. """ # insert mol into the binding pocket. host_bps, host_masses = openmm_deserializer.deserialize_system(host_system, cutoff=1.2) min_host_coords = minimize_host_4d([mol], host_system, host_coords, ff, box) ligand_masses = [a.GetMass() for a in mol.GetAtoms()] ligand_coords = get_romol_conf(mol) combined_masses = np.concatenate([host_masses, ligand_masses]) combined_coords = np.concatenate([min_host_coords, ligand_coords]) top = topology.BaseTopology(mol, ff) hgt = topology.HostGuestTopology(host_bps, top) # setup the parameter handlers for the ligand tuples = [ [hgt.parameterize_harmonic_bond, [ff.hb_handle]], [hgt.parameterize_harmonic_angle, [ff.ha_handle]], [hgt.parameterize_periodic_torsion, [ff.pt_handle, ff.it_handle]], [hgt.parameterize_nonbonded, [ff.q_handle, ff.lj_handle]], ] u_impls = [] bound_potentials = [] for fn, handles in tuples: params, potential = fn(*[h.params for h in handles]) bp = potential.bind(params) bound_potentials.append(bp) u_impls.append(bp.bound_impl(precision=np.float32)) bond_list = get_bond_list(bound_potentials[0]) combined_masses = model_utils.apply_hmr(combined_masses, bond_list) dt = 2.5e-3 friction = 1.0 if seed is None: seed = np.random.randint(np.iinfo(np.int32).max) integrator = LangevinIntegrator(temperature, dt, friction, combined_masses, seed).impl() x0 = combined_coords v0 = np.zeros_like(x0) group_indices = get_group_indices(bond_list) barostat_interval = 5 barostat = MonteCarloBarostat(x0.shape[0], pressure, temperature, group_indices, barostat_interval, seed).impl( u_impls ) # Re-minimize with the mol being flexible x0 = fire_minimize(x0, u_impls, box, np.ones(50)) # context components: positions, velocities, box, integrator, energy fxns ctxt = custom_ops.Context(x0, v0, box, integrator, u_impls, barostat) ctxt.multiple_steps(np.linspace(0.0, 0.0, n_steps)) return ctxt.get_x_t(), ctxt.get_box()
def dock_and_equilibrate( host_pdbfile, guests_sdfile, max_lambda, insertion_steps, eq_steps, outdir, fewer_outfiles=False, constant_atoms=[], ): """Solvates a host, inserts guest(s) into solvated host, equilibrates Parameters ---------- host_pdbfile: path to host pdb file to dock into guests_sdfile: path to input sdf with guests to pose/dock max_lambda: lambda value the guest should insert from or delete to (recommended: 1.0 for work calulation, 0.25 to stay close to original pose) (must be =1 for work calculation to be applicable) insertion_steps: how many steps to insert the guest over (recommended: 501) eq_steps: how many steps of equilibration to do after insertion (recommended: 15001) outdir: where to write output (will be created if it does not already exist) fewer_outfiles: if True, will only write frames for the equilibration, not insertion constant_atoms: atom numbers from the host_pdbfile to hold mostly fixed across the simulation (1-indexed, like PDB files) Output ------ A pdb & sdf file for the last step of insertion (outdir/<guest_name>/<guest_name>_ins_<step>_[host.pdb/guest.sdf]) A pdb & sdf file every 1000 steps of equilibration (outdir/<guest_name>/<guest_name>_eq_<step>_[host.pdb/guest.sdf]) stdout corresponding to the files written noting the lambda value and energy stdout for each guest noting the work of transition, if applicable stdout for each guest noting how long it took to run Note ---- The work will not be calculated if the du_dl endpoints are not close to 0 or if any norm of force per atom exceeds 20000 kJ/(mol*nm) [MAX_NORM_FORCE defined in docking/report.py] """ if not os.path.exists(outdir): os.makedirs(outdir) print(f""" HOST_PDBFILE = {host_pdbfile} GUESTS_SDFILE = {guests_sdfile} OUTDIR = {outdir} MAX_LAMBDA = {max_lambda} INSERTION_STEPS = {insertion_steps} EQ_STEPS = {eq_steps} """) # Prepare host # TODO: handle extra (non-transitioning) guests? print("Solvating host...") ( solvated_host_system, solvated_host_coords, _, _, host_box, solvated_topology, ) = builders.build_protein_system(host_pdbfile) _, solvated_host_pdb = tempfile.mkstemp(suffix=".pdb", text=True) writer = pdb_writer.PDBWriter([solvated_topology], solvated_host_pdb) writer.write_frame(solvated_host_coords) writer.close() solvated_host_mol = Chem.MolFromPDBFile(solvated_host_pdb, removeHs=False) os.remove(solvated_host_pdb) ff = Forcefield.load_from_file("smirnoff_1_1_0_ccc.py") # Run the procedure print("Getting guests...") suppl = Chem.SDMolSupplier(guests_sdfile, removeHs=False) for guest_mol in suppl: start_time = time.time() guest_name = guest_mol.GetProp("_Name") guest_conformer = guest_mol.GetConformer(0) orig_guest_coords = np.array(guest_conformer.GetPositions(), dtype=np.float64) orig_guest_coords = orig_guest_coords / 10 # convert to md_units minimized_coords = minimizer.minimize_host_4d([guest_mol], solvated_host_system, solvated_host_coords, ff, host_box) afe = free_energy.AbsoluteFreeEnergy(guest_mol, ff) ups, sys_params, combined_masses, _ = afe.prepare_host_edge( ff.get_ordered_params(), solvated_host_system, minimized_coords) combined_bps = [] for up, sp in zip(ups, sys_params): combined_bps.append(up.bind(sp)) x0 = np.concatenate([minimized_coords, orig_guest_coords]) v0 = np.zeros_like(x0) print("SYSTEM", f"guest_name: {guest_name}", f"num_atoms: {len(x0)}") for atom_num in constant_atoms: combined_masses[atom_num - 1] += 50000 seed = 2021 intg = LangevinIntegrator(300.0, 1.5e-3, 1.0, combined_masses, seed).impl() u_impls = [] for bp in combined_bps: bp_impl = bp.bound_impl(precision=np.float32) u_impls.append(bp_impl) ctxt = custom_ops.Context(x0, v0, host_box, intg, u_impls) # insert guest insertion_lambda_schedule = np.linspace(max_lambda, 0.0, insertion_steps) calc_work = True # collect a du_dl calculation once every other step subsample_interval = 1 full_du_dls, _, _ = ctxt.multiple_steps(insertion_lambda_schedule, subsample_interval) step = len(insertion_lambda_schedule) - 1 lamb = insertion_lambda_schedule[-1] ctxt.step(lamb) report.report_step( ctxt, step, lamb, host_box, combined_bps, u_impls, guest_name, insertion_steps, "INSERTION", ) if not fewer_outfiles: host_coords = ctxt.get_x_t()[:len(solvated_host_coords)] * 10 guest_coords = ctxt.get_x_t()[len(solvated_host_coords):] * 10 report.write_frame( host_coords, solvated_host_mol, guest_coords, guest_mol, guest_name, outdir, str(step).zfill(len(str(insertion_steps))), "ins", ) if report.too_much_force(ctxt, lamb, host_box, combined_bps, u_impls): print("Not calculating work (too much force)") calc_work = False continue # Note: this condition only applies for ABFE, not RBFE if abs(full_du_dls[0]) > 0.001 or abs(full_du_dls[-1]) > 0.001: print("Not calculating work (du_dl endpoints are not ~0)") calc_work = False if calc_work: work = np.trapz(full_du_dls, insertion_lambda_schedule[::subsample_interval]) print(f"guest_name: {guest_name}\tinsertion_work: {work:.2f}") # equilibrate for step in range(eq_steps): ctxt.step(0.00) if step % 1000 == 0: report.report_step( ctxt, step, 0.00, host_box, combined_bps, u_impls, guest_name, eq_steps, "EQUILIBRATION", ) if (not fewer_outfiles) or (step == eq_steps - 1): host_coords = ctxt.get_x_t()[:len(solvated_host_coords )] * 10 guest_coords = ctxt.get_x_t()[len(solvated_host_coords ):] * 10 report.write_frame( host_coords, solvated_host_mol, guest_coords, guest_mol, guest_name, outdir, str(step).zfill(len(str(eq_steps))), "eq", ) if step in (0, int(eq_steps / 2), eq_steps - 1): if report.too_much_force(ctxt, 0.00, host_box, combined_bps, u_impls): break end_time = time.time() print(f"{guest_name} took {(end_time - start_time):.2f} seconds")
def equilibrate_edges( self, edges: List[Tuple[Chem.Mol, Chem.Mol, np.ndarray]], lamb: float = 0.0, barostat_interval: int = 10, equilibration_steps: int = 100000, cache_path: str = "equilibration_cache.pkl", ): """ edges: List of tuples with mol_a, mol_b, core Edges to equilibrate lamb: float Lambda value to equilibrate at. Uses Dual Topology to equilibrate barostat_interval: int Interval on which to run barostat during equilibration equilibration_steps: int Number of steps to equilibrate the edge for cache_path: string Path to look for existing cache or path to where to save cache. By default it will write out a pickle file in the local directory. Pre equilibrate edges and cache them for later use in predictions. Parallelized via the model client if possible """ if not self.pre_equilibrate: return if os.path.isfile(cache_path): with open(cache_path, "rb") as ifs: self._equil_cache = load(ifs) print("Loaded Pre-equilibrated structures from cache") return futures = [] ordered_params = self.ff.get_ordered_params() temperature = 300.0 pressure = 1.0 for stage, host_system, host_coords, host_box in [ ("complex", self.complex_system, self.complex_coords, self.complex_box), ("solvent", self.solvent_system, self.solvent_coords, self.solvent_box), ]: # Run all complex legs first then solvent, as they will likely take longer than then solvent leg for mol_a, mol_b, core in edges: # Use DualTopology to ensure mols exist in the same space. topo = topology.DualTopologyMinimization(mol_a, mol_b, self.ff) rfe = free_energy.RelativeFreeEnergy(topo) min_coords = minimizer.minimize_host_4d([mol_a, mol_b], host_system, host_coords, self.ff, host_box) unbound_potentials, sys_params, masses, coords = rfe.prepare_host_edge( ordered_params, host_system, min_coords) # num_host_coords = len(host_coords) # masses[num_host_coords:] *= 1000000 # Lets see if masses are the difference harmonic_bond_potential = unbound_potentials[0] bond_list = get_bond_list(harmonic_bond_potential) group_idxs = get_group_indices(bond_list) time_step = 1.5e-3 if self.hmr: masses = apply_hmr(masses, bond_list) time_step = 2.5e-3 integrator = LangevinIntegrator(temperature, time_step, 1.0, masses, 0) barostat = MonteCarloBarostat(coords.shape[0], pressure, temperature, group_idxs, barostat_interval, 0) pots = [] for bp, params in zip(unbound_potentials, sys_params): pots.append(bp.bind(np.asarray(params))) future = self.client.submit( estimator.equilibrate, *[ integrator, barostat, pots, coords, host_box, lamb, equilibration_steps ]) futures.append((stage, (mol_a, mol_b, core), future)) num_equil = len(futures) for i, (stage, edge, future) in enumerate(futures): edge_hash = self._edge_hash(stage, *edge) self._equil_cache[edge_hash] = future.result() if (i + 1) % 5 == 0: print(f"Pre-equilibrated {i+1} of {num_equil} edges") print(f"Pre-equilibrated {num_equil} edges") if cache_path: with open(cache_path, "wb") as ofs: dump(self._equil_cache, ofs) print(f"Saved equilibration_cache to {cache_path}")
def benchmark_dhfr(): pdb_path = 'tests/data/5dfr_solv_equil.pdb' host_pdb = app.PDBFile(pdb_path) protein_ff = app.ForceField('amber99sbildn.xml', 'tip3p.xml') host_system = protein_ff.createSystem( host_pdb.topology, nonbondedMethod=app.NoCutoff, constraints=None, rigidWater=False ) host_coords = host_pdb.positions box = host_pdb.topology.getPeriodicBoxVectors() box = np.asarray(box/box.unit) host_fns, host_masses = openmm_deserializer.deserialize_system( host_system, cutoff=1.0 ) host_conf = [] for x,y,z in host_coords: host_conf.append([to_md_units(x),to_md_units(y),to_md_units(z)]) host_conf = np.array(host_conf) seed = 1234 dt = 1.5e-3 intg = LangevinIntegrator( 300, dt, 1.0, np.array(host_masses), seed ).impl() bps = [] for potential in host_fns: bps.append(potential.bound_impl(precision=np.float32)) # get the bound implementation x0 = host_conf v0 = np.zeros_like(host_conf) ctxt = custom_ops.Context( x0, v0, box, intg, bps ) # initialize observables obs = [] for bp in bps: du_dp_obs = custom_ops.AvgPartialUPartialParam(bp, 100) ctxt.add_observable(du_dp_obs) obs.append(du_dp_obs) lamb = 0.0 start = time.time() # num_steps = 50000 num_steps = 50000 # num_steps = 10 writer = PDBWriter([host_pdb.topology], "dhfr.pdb") for step in range(num_steps): ctxt.step(lamb) if step % 1000 == 0: delta = time.time()-start steps_per_second = step/delta seconds_per_day = 86400 steps_per_day = steps_per_second*seconds_per_day ps_per_day = dt*steps_per_day ns_per_day = ps_per_day*1e-3 print(step, "ns/day", ns_per_day) # coords = recenter(ctxt.get_x_t(), box) # writer.write_frame(coords*10) print("total time", time.time() - start) writer.close() # bond angle torsions nonbonded for potential, du_dp_obs in zip(host_fns, obs): dp = du_dp_obs.avg_du_dp() print(potential, dp.shape) print(dp)
def run_leg( combined_coords, combined_bps, combined_masses, host_box, guest_name, leg_type, num_switches, transition_steps, ): x0 = combined_coords v0 = np.zeros_like(x0) print( f"{leg_type.upper()}_SYSTEM", f"guest_name: {guest_name}", f"num_atoms: {len(x0)}", ) seed = 2021 intg = LangevinIntegrator(300.0, 1.5e-3, 1.0, combined_masses, seed).impl() u_impls = [] for bp in combined_bps: bp_impl = bp.bound_impl(precision=np.float32) u_impls.append(bp_impl) ctxt = custom_ops.Context(x0, v0, host_box, intg, u_impls) # TODO: pre-equilibrate? # equilibrate & shoot off switching jobs steps_per_batch = 1001 works = [] for b in range(num_switches): equil2_lambda_schedule = np.ones(steps_per_batch) * MIN_LAMBDA ctxt.multiple_steps(equil2_lambda_schedule, 0) lamb = equil2_lambda_schedule[-1] step = len(equil2_lambda_schedule) - 1 report.report_step( ctxt, (b + 1) * step, lamb, host_box, combined_bps, u_impls, guest_name, num_switches * steps_per_batch, f"{leg_type.upper()}_EQUILIBRATION_2", ) if report.too_much_force(ctxt, MIN_LAMBDA, host_box, combined_bps, u_impls): return work = do_switch( ctxt.get_x_t(), ctxt.get_v_t(), combined_bps, combined_masses, host_box, guest_name, leg_type, u_impls, transition_steps, ) works.append(work) return works
def simulate_futures( self, ff_params, mol, x0, box0, prefix, core_idxs=None, seed=0 ) -> Tuple[List[Any], estimator_abfe.FreeEnergyModel, List[Any]]: top = self.setup_topology(mol) afe = free_energy_rabfe.AbsoluteFreeEnergy(mol, top) unbound_potentials, sys_params, masses = afe.prepare_host_edge( ff_params, self.host_system) if seed == 0: seed = np.random.randint(np.iinfo(np.int32).max) beta = 1 / (constants.BOLTZ * self.temperature) bond_list = get_bond_list(unbound_potentials[0]) masses = model_utils.apply_hmr(masses, bond_list) friction = 1.0 integrator = LangevinIntegrator(self.temperature, self.dt, friction, masses, seed) group_indices = get_group_indices(bond_list) barostat_interval = 5 barostat = MonteCarloBarostat(x0.shape[0], self.pressure, self.temperature, group_indices, barostat_interval, seed) v0 = np.zeros_like(x0) endpoint_correct = False model = estimator_abfe.FreeEnergyModel( unbound_potentials, endpoint_correct, self.client, box0, x0, v0, integrator, barostat, self.host_schedule, self.equil_steps, self.prod_steps, beta, prefix, ) bound_potentials = [] for params, unbound_pot in zip(sys_params, model.unbound_potentials): bp = unbound_pot.bind(np.asarray(params)) bound_potentials.append(bp) all_args = [] for lamb_idx, lamb in enumerate(model.lambda_schedule): subsample_interval = 1000 all_args.append(( lamb, model.box, model.x0, model.v0, bound_potentials, model.integrator, model.barostat, model.equil_steps, model.prod_steps, subsample_interval, subsample_interval, model.lambda_schedule, )) if endpoint_correct: assert isinstance(bound_potentials[-1], potentials.HarmonicBond) all_args.append(( 1.0, model.box, model.x0, model.v0, bound_potentials[:-1], # strip out the restraints model.integrator, model.barostat, model.equil_steps, model.prod_steps, subsample_interval, subsample_interval, [], # no need to evaluate Us for the endpoint correction )) futures = [] if self.client is None: for args in all_args: futures.append(_MockFuture(estimator_abfe.simulate(*args))) else: for args in all_args: futures.append( self.client.submit(estimator_abfe.simulate, *args)) return sys_params, model, futures
def main(args, stage): # benzene = Chem.AddHs(Chem.MolFromSmiles("c1ccccc1")) # a # phenol = Chem.AddHs(Chem.MolFromSmiles("Oc1ccccc1")) # b #01234567890 benzene = Chem.AddHs(Chem.MolFromSmiles("C1=CC=C2C=CC=CC2=C1")) # a phenol = Chem.AddHs(Chem.MolFromSmiles("C1=CC=C2C=CC=CC2=C1")) # b AllChem.EmbedMolecule(benzene) AllChem.EmbedMolecule(phenol) ff_handlers = deserialize_handlers( open('ff/params/smirnoff_1_1_0_ccc.py').read()) r_benzene = Recipe.from_rdkit(benzene, ff_handlers) r_phenol = Recipe.from_rdkit(phenol, ff_handlers) r_combined = r_benzene.combine(r_phenol) core_pairs = np.array( [ [0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7], [8, 8], [9, 9], # [10,10] ], dtype=np.int32) core_pairs[:, 1] += benzene.GetNumAtoms() a_idxs = np.arange(benzene.GetNumAtoms()) b_idxs = np.arange(phenol.GetNumAtoms()) + benzene.GetNumAtoms() core_k = 20.0 if stage == 0: centroid_k = 200.0 rbfe.stage_0(r_combined, b_idxs, core_pairs, centroid_k, core_k) # lambda_schedule = np.linspace(0.0, 1.0, 2) # lambda_schedule = np.array([0.0, 0.0, 0.0, 0.0, 0.0]) lambda_schedule = np.array([0.0, 0.0, 0.0, 0.0, 0.0]) elif stage == 1: rbfe.stage_1(r_combined, a_idxs, b_idxs, core_pairs, core_k) lambda_schedule = np.linspace(0.0, 1.2, 60) else: assert 0 system, host_coords, box, topology = builders.build_water_system(4.0) r_host = Recipe.from_openmm(system) r_final = r_host.combine(r_combined) # minimize coordinates of host + ligand A ha_coords = np.concatenate([host_coords, get_romol_conf(benzene)]) pool = Pool(args.num_gpus) # we need to run this in a subprocess since the cuda runtime # must not be initialized in the master thread due to lack of # fork safety r_minimize = minimize_setup(r_host, r_benzene) ha_coords = pool.map( minimize, [(r_minimize.bound_potentials, r_minimize.masses, ha_coords, box)], chunksize=1) # this is a list ha_coords = ha_coords[0] pool.close() pool = Pool(args.num_gpus) x0 = np.concatenate([ha_coords, get_romol_conf(phenol)]) masses = np.concatenate([r_host.masses, r_benzene.masses, r_phenol.masses]) seed = np.random.randint(np.iinfo(np.int32).max) intg = LangevinIntegrator(300.0, 1.5e-3, 1.0, masses, seed) # production run at various values of lambda for epoch in range(10): avg_du_dls = [] run_args = [] for lamb_idx, lamb in enumerate(lambda_schedule): run_args.append( (lamb, intg, r_final.bound_potentials, r_final.masses, x0, box, lamb_idx % args.num_gpus, stage)) avg_du_dls = pool.map(run, run_args, chunksize=1) print("stage", stage, "epoch", epoch, "dG", np.trapz(avg_du_dls, lambda_schedule))
def run_leg( orig_host_coords, orig_guest_coords, combined_bps, combined_masses, host_box, guest_name, leg_type, host_mol, guest_mol, outdir, fewer_outfiles=False, no_outfiles=False, ): x0 = np.concatenate([orig_host_coords, orig_guest_coords]) v0 = np.zeros_like(x0) print( f"{leg_type.upper()}_SYSTEM", f"guest_name: {guest_name}", f"num_atoms: {len(x0)}", ) seed = 2021 intg = LangevinIntegrator(300.0, 1.5e-3, 1.0, combined_masses, seed).impl() u_impls = [] for bp in combined_bps: bp_impl = bp.bound_impl(precision=np.float32) u_impls.append(bp_impl) ctxt = custom_ops.Context(x0, v0, host_box, intg, u_impls) # insert guest insertion_lambda_schedule = np.linspace( INSERTION_MAX_LAMBDA, MIN_LAMBDA, TRANSITION_STEPS ) for step, lamb in enumerate(insertion_lambda_schedule): ctxt.step(lamb) if step % 100 == 0: report.report_step( ctxt, step, lamb, host_box, combined_bps, u_impls, guest_name, TRANSITION_STEPS, f"{leg_type.upper()}_INSERTION", ) if not fewer_outfiles and not no_outfiles: host_coords = ctxt.get_x_t()[: len(orig_host_coords)] * 10 guest_coords = ctxt.get_x_t()[len(orig_host_coords) :] * 10 report.write_frame( host_coords, host_mol, guest_coords, guest_mol, guest_name, outdir, str(step).zfill(len(str(TRANSITION_STEPS))), f"{leg_type}-ins", ) if step in (0, int(TRANSITION_STEPS/2), TRANSITION_STEPS-1): if report.too_much_force(ctxt, lamb, host_box, combined_bps, u_impls): return # equilibrate for step in range(EQ1_STEPS): ctxt.step(MIN_LAMBDA) if step % 1000 == 0: report.report_step( ctxt, step, MIN_LAMBDA, host_box, combined_bps, u_impls, guest_name, EQ1_STEPS, f"{leg_type.upper()}_EQUILIBRATION_1", ) if not fewer_outfiles and not no_outfiles: host_coords = ctxt.get_x_t()[: len(orig_host_coords)] * 10 guest_coords = ctxt.get_x_t()[len(orig_host_coords) :] * 10 report.write_frame( host_coords, host_mol, guest_coords, guest_mol, guest_name, outdir, str(step).zfill(len(str(EQ1_STEPS))), f"{leg_type}-eq1", ) if step in (0, int(EQ1_STEPS/2), EQ1_STEPS-1): if report.too_much_force(ctxt, MIN_LAMBDA, host_box, combined_bps, u_impls): return # equilibrate more & shoot off deletion jobs for step in range(EQ2_STEPS): ctxt.step(MIN_LAMBDA) if step % 1000 == 0: report.report_step( ctxt, step, MIN_LAMBDA, host_box, combined_bps, u_impls, guest_name, EQ2_STEPS, f"{leg_type.upper()}_EQUILIBRATION_2", ) # TODO: if guest has undocked, stop simulation if not no_outfiles: host_coords = ctxt.get_x_t()[: len(orig_host_coords)] * 10 guest_coords = ctxt.get_x_t()[len(orig_host_coords) :] * 10 report.write_frame( host_coords, host_mol, guest_coords, guest_mol, guest_name, outdir, str(step).zfill(len(str(EQ2_STEPS))), f"{leg_type}-eq2", ) if report.too_much_force(ctxt, MIN_LAMBDA, host_box, combined_bps, u_impls): return do_deletion( ctxt.get_x_t(), ctxt.get_v_t(), combined_bps, combined_masses, host_box, guest_name, leg_type, )
def benchmark( label, masses, lamb, x0, v0, box, bound_potentials, hmr=False, verbose=True, num_batches=100, steps_per_batch=1000, compute_du_dl_interval=0, barostat_interval=0, ): """ TODO: configuration blob containing num_batches, steps_per_batch, and any other options """ seed = 1234 dt = 1.5e-3 temperature = 300 pressure = 1.0 seconds_per_day = 86400 harmonic_bond_potential = bound_potentials[0] bond_list = get_bond_list(harmonic_bond_potential) if hmr: dt = 2.5e-3 masses = apply_hmr(masses, bond_list) intg = LangevinIntegrator(temperature, dt, 1.0, np.array(masses), seed).impl() bps = [] for potential in bound_potentials: bps.append(potential.bound_impl(precision=np.float32)) # get the bound implementation baro_impl = None if barostat_interval > 0: group_idxs = get_group_indices(bond_list) baro = MonteCarloBarostat( x0.shape[0], pressure, temperature, group_idxs, barostat_interval, seed, ) baro_impl = baro.impl(bps) ctxt = custom_ops.Context( x0, v0, box, intg, bps, barostat=baro_impl, ) batch_times = [] lambda_schedule = np.ones(steps_per_batch) * lamb # run once before timer starts ctxt.multiple_steps(lambda_schedule, compute_du_dl_interval) start = time.time() for batch in range(num_batches): # time the current batch batch_start = time.time() du_dls, _, _ = ctxt.multiple_steps(lambda_schedule, compute_du_dl_interval) batch_end = time.time() delta = batch_end - batch_start batch_times.append(delta) steps_per_second = steps_per_batch / np.mean(batch_times) steps_per_day = steps_per_second * seconds_per_day ps_per_day = dt * steps_per_day ns_per_day = ps_per_day * 1e-3 if verbose: print(f"steps per second: {steps_per_second:.3f}") print(f"ns per day: {ns_per_day:.3f}") assert np.all(np.abs(ctxt.get_x_t()) < 1000) print( f"{label}: N={x0.shape[0]} speed: {ns_per_day:.2f}ns/day dt: {dt*1e3}fs (ran {steps_per_batch * num_batches} steps in {(time.time() - start):.2f}s)" )
complex_coords, ff, complex_box) afe = AbsoluteFreeEnergy(mol_a, ff) unbound_potentials, sys_params, masses, coords = afe.prepare_host_edge( ff.get_ordered_params(), complex_system, min_complex_coords) # define NPT ensemble potential_energy_model = PotentialEnergyModel(sys_params, unbound_potentials) ensemble = NPTEnsemble(potential_energy_model, temperature, pressure) # define a thermostat integrator = LangevinIntegrator( temperature.value_in_unit(unit.kelvin), timestep.value_in_unit(unit.picosecond), collision_rate.value_in_unit(unit.picosecond**-1), masses, seed, ) integrator_impl = integrator.impl() def reduced_potential_fxn(x, box, lam): u, du_dx = ensemble.reduced_potential_and_gradient(x, box, lam) return u # get list of molecules for barostat by looking at bond table harmonic_bond_potential = unbound_potentials[0] bond_list = get_bond_list(harmonic_bond_potential) group_indices = get_group_indices(bond_list) trajs = []
# out_dir = os.path.join(epoch_dir, "mol_"+mol.GetProp("_Name"))\ # if not os.path.exists(out_dir): # os.makedirs(out_dir) # safety guard try: potentials, masses, vjp_fns = hydration_setup.combine_potentials( ff_handlers, mol, host_system, precision=np.float32) coords = hydration_setup.combine_coordinates(host_coords, mol) seed = np.random.randint(0, np.iinfo(np.int32).max) intg = LangevinIntegrator(float(intg_cfg["temperature"]), float(intg_cfg["dt"]), float(intg_cfg["friction"]), masses, seed) sim = simulation.Simulation(coords, np.zeros_like(coords), box, potentials, intg) (pred_dG, pred_err), grad_dG, du_dls = hydration_model.simulate( sim, num_steps, lambda_schedule, stubs) plt.plot(lambda_schedule, du_dls) plt.ylabel("du_dlambda") plt.xlabel("lambda") plt.savefig( os.path.join(epoch_dir, "ti_mol_" + mol.GetProp("_Name"))) plt.clf()
# note: lambda goes from 0 to 1, 0 being fully-interacting and 1.0 being fully interacting. for lamb_idx, final_lamb in enumerate(np.linspace(1, 0, 8)): # write some conformations into this PDB file writer = pdb_writer.PDBWriter([omm_topology, romol_a, romol_b], "debug_" + str(lamb_idx) + ".pdb") seed = 2020 # note: the .impl() call at the end returns a pickle-able version of the # wrapper function -- since contexts are not pickle-able -- which will # be useful later in timemachine's multi-device parallelization strategy) # note: OpenMM unit system used throughout # (temperature: kelvin, timestep: picosecond, collision_rate: picosecond^-1) intg = LangevinIntegrator(300.0, 1.5e-3, 1.0, combined_masses, seed).impl() x0 = combined_coords v0 = np.zeros_like(x0) u_impls = [] for bp in final_potentials: u_impls.append(bp.bound_impl(np.float32)) # context components: positions, velocities, box, integrator, energy fxns ctxt = custom_ops.Context(x0, v0, box, intg, u_impls) for step, lamb in enumerate(np.linspace(1.0, final_lamb, 1000)): if step % 500 == 0: writer.write_frame(ctxt.get_x_t() * 10)
def test_barostat_partial_group_idxs(): """Verify that the barostat can handle a subset of the molecules rather than all of them. This test only verify that it runs, not the behavior""" temperature = 300.0 * unit.kelvin initial_waterbox_width = 3.0 * unit.nanometer timestep = 1.5 * unit.femtosecond barostat_interval = 3 collision_rate = 1.0 / unit.picosecond seed = 2021 np.random.seed(seed) pressure = 1.0 * unit.atmosphere mol_a = hif2a_ligand_pair.mol_a ff = hif2a_ligand_pair.ff complex_system, complex_coords, complex_box, complex_top = build_water_system( initial_waterbox_width.value_in_unit(unit.nanometer)) min_complex_coords = minimize_host_4d([mol_a], complex_system, complex_coords, ff, complex_box) afe = AbsoluteFreeEnergy(mol_a, ff) unbound_potentials, sys_params, masses, coords = afe.prepare_host_edge( ff.get_ordered_params(), complex_system, min_complex_coords) # get list of molecules for barostat by looking at bond table harmonic_bond_potential = unbound_potentials[0] bond_list = get_bond_list(harmonic_bond_potential) group_indices = get_group_indices(bond_list) # Cut the number of groups in half group_indices = group_indices[len(group_indices) // 2:] lam = 1.0 bound_potentials = [] for params, unbound_pot in zip(sys_params, unbound_potentials): bp = unbound_pot.bind(np.asarray(params)) bound_potentials.append(bp) u_impls = [] for bp in bound_potentials: bp_impl = bp.bound_impl(precision=np.float32) u_impls.append(bp_impl) integrator = LangevinIntegrator( temperature.value_in_unit(unit.kelvin), timestep.value_in_unit(unit.picosecond), collision_rate.value_in_unit(unit.picosecond**-1), masses, seed, ) integrator_impl = integrator.impl() v_0 = sample_velocities(masses * unit.amu, temperature) baro = custom_ops.MonteCarloBarostat( coords.shape[0], pressure.value_in_unit(unit.bar), temperature.value_in_unit(unit.kelvin), group_indices, barostat_interval, u_impls, seed, ) ctxt = custom_ops.Context(coords, v_0, complex_box, integrator_impl, u_impls, barostat=baro) ctxt.multiple_steps(np.ones(1000) * lam)
mol, host_system, precision=np.float32 ) coords = hydration_setup.combine_coordinates( host_coords, mol ) seed = np.random.randint(0, np.iinfo(np.int32).max) intg = LangevinIntegrator( float(intg_cfg['temperature']), float(intg_cfg['dt']), float(intg_cfg['friction']), masses, seed ) sim = simulation.Simulation( coords, np.zeros_like(coords), box, potentials, intg ) (pred_dG, pred_err), grad_dG, du_dls = hydration_model.simulate( sim, num_steps,