def test_from_pdb(self): host_pdb_file = "tests/hif2a_nowater_min.pdb" host_pdb = app.PDBFile(host_pdb_file) amber_ff = app.ForceField('amber99sbildn.xml', 'amber99_obc.xml') host_system = amber_ff.createSystem(host_pdb.topology, nonbondedMethod=app.NoCutoff, constraints=None, rigidWater=False) nrg_fns, masses = openmm_deserializer.deserialize_system(host_system) assert len(nrg_fns.keys()) == 6
def from_openmm(cls, omm_system): """ Initialize a system from an OpenMM System. Parameters ---------- omm_system: openm.System OpenMM system object """ bound_potentials, masses = openmm_deserializer.deserialize_system( omm_system, cutoff=1.0 ) return cls(masses, bound_potentials)
def dock_and_equilibrate(host_pdbfile, guests_sdfile, max_lambda, insertion_steps, eq_steps, outdir, fewer_outfiles=False, constant_atoms=[]): """Solvates a host, inserts guest(s) into solvated host, equilibrates Parameters ---------- host_pdbfile: path to host pdb file to dock into guests_sdfile: path to input sdf with guests to pose/dock max_lambda: lambda value the guest should insert from or delete to (recommended: 1.0 for work calulation, 0.25 to stay close to original pose) (must be =1 for work calculation to be applicable) insertion_steps: how many steps to insert the guest over (recommended: 501) eq_steps: how many steps of equilibration to do after insertion (recommended: 15001) outdir: where to write output (will be created if it does not already exist) fewer_outfiles: if True, will only write frames for the equilibration, not insertion constant_atoms: atom numbers from the host_pdbfile to hold mostly fixed across the simulation (1-indexed, like PDB files) Output ------ A pdb & sdf file every 100 steps of insertion (outdir/<guest_name>/<guest_name>_<step>.[pdb/sdf]) A pdb & sdf file every 1000 steps of equilibration (outdir/<guest_name>/<guest_name>_<step>.[pdb/sdf]) stdout every 100(0) steps noting the step number, lambda value, and energy stdout for each guest noting the work of transition stdout for each guest noting how long it took to run Note ---- If any norm of force per atom exceeds 20000 kJ/(mol*nm) [MAX_NORM_FORCE defined in docking/report.py], the simulation for that guest will stop and the work will not be calculated. """ if not os.path.exists(outdir): os.makedirs(outdir) print(f""" HOST_PDBFILE = {host_pdbfile} GUESTS_SDFILE = {guests_sdfile} OUTDIR = {outdir} MAX_LAMBDA = {max_lambda} INSERTION_STEPS = {insertion_steps} EQ_STEPS = {eq_steps} """) # Prepare host # TODO: handle extra (non-transitioning) guests? print("Solvating host...") # TODO: return topology from builders.build_protein_system ( solvated_host_system, solvated_host_coords, _, _, host_box, solvated_topology, ) = builders.build_protein_system(host_pdbfile) # sometimes water boxes are sad. Should be minimized first; this is a workaround host_box += np.eye(3) * 0.1 print("host box", host_box) solvated_host_pdb = os.path.join(outdir, "solvated_host.pdb") writer = pdb_writer.PDBWriter([solvated_topology], solvated_host_pdb) writer.write_frame(solvated_host_coords) writer.close() solvated_host_mol = Chem.MolFromPDBFile(solvated_host_pdb, removeHs=False) os.remove(solvated_host_pdb) final_host_potentials = [] host_potentials, host_masses = openmm_deserializer.deserialize_system( solvated_host_system, cutoff=1.2) host_nb_bp = None for bp in host_potentials: if isinstance(bp, potentials.Nonbonded): # (ytz): hack to ensure we only have one nonbonded term assert host_nb_bp is None host_nb_bp = bp else: final_host_potentials.append(bp) # Run the procedure print("Getting guests...") suppl = Chem.SDMolSupplier(guests_sdfile, removeHs=False) for guest_mol in suppl: start_time = time.time() guest_name = guest_mol.GetProp("_Name") guest_conformer = guest_mol.GetConformer(0) orig_guest_coords = np.array(guest_conformer.GetPositions(), dtype=np.float64) orig_guest_coords = orig_guest_coords / 10 # convert to md_units guest_ff_handlers = deserialize_handlers( open( os.path.join( os.path.dirname(os.path.abspath(__file__)), "..", "ff/params/smirnoff_1_1_0_ccc.py", )).read()) ff = Forcefield(guest_ff_handlers) guest_base_top = topology.BaseTopology(guest_mol, ff) # combine host & guest hgt = topology.HostGuestTopology(host_nb_bp, guest_base_top) # setup the parameter handlers for the ligand bonded_tuples = [[hgt.parameterize_harmonic_bond, ff.hb_handle], [hgt.parameterize_harmonic_angle, ff.ha_handle], [hgt.parameterize_proper_torsion, ff.pt_handle], [hgt.parameterize_improper_torsion, ff.it_handle]] combined_bps = list(final_host_potentials) # instantiate the vjps while parameterizing (forward pass) for fn, handle in bonded_tuples: params, potential = fn(handle.params) combined_bps.append(potential.bind(params)) nb_params, nb_potential = hgt.parameterize_nonbonded( ff.q_handle.params, ff.lj_handle.params) combined_bps.append(nb_potential.bind(nb_params)) guest_masses = [a.GetMass() for a in guest_mol.GetAtoms()] combined_masses = np.concatenate([host_masses, guest_masses]) x0 = np.concatenate([solvated_host_coords, orig_guest_coords]) v0 = np.zeros_like(x0) print( f"SYSTEM", f"guest_name: {guest_name}", f"num_atoms: {len(x0)}", ) for atom_num in constant_atoms: combined_masses[atom_num - 1] += 50000 seed = 2021 intg = LangevinIntegrator(300.0, 1.5e-3, 1.0, combined_masses, seed).impl() u_impls = [] for bp in combined_bps: bp_impl = bp.bound_impl(precision=np.float32) u_impls.append(bp_impl) ctxt = custom_ops.Context(x0, v0, host_box, intg, u_impls) # collect a du_dl calculation once every other step subsample_freq = 2 du_dl_obs = custom_ops.FullPartialUPartialLambda( u_impls, subsample_freq) ctxt.add_observable(du_dl_obs) # insert guest insertion_lambda_schedule = np.linspace(max_lambda, 0.0, insertion_steps) calc_work = True for step, lamb in enumerate(insertion_lambda_schedule): ctxt.step(lamb) if step % 100 == 0: report.report_step(ctxt, step, lamb, host_box, combined_bps, u_impls, guest_name, insertion_steps, "INSERTION") if not fewer_outfiles: host_coords = ctxt.get_x_t()[:len(solvated_host_coords )] * 10 guest_coords = ctxt.get_x_t()[len(solvated_host_coords ):] * 10 report.write_frame( host_coords, solvated_host_mol, guest_coords, guest_mol, guest_name, outdir, str(step).zfill(len(str(insertion_steps))), f"ins", ) if step in (0, int(insertion_steps / 2), insertion_steps - 1): if report.too_much_force(ctxt, lamb, host_box, combined_bps, u_impls): calc_work = False break # Note: this condition only applies for ABFE, not RBFE if (abs(du_dl_obs.full_du_dl()[0]) > 0.001 or abs(du_dl_obs.full_du_dl()[-1]) > 0.001): print("Error: du_dl endpoints are not ~0") calc_work = False if calc_work: work = np.trapz(du_dl_obs.full_du_dl(), insertion_lambda_schedule[::subsample_freq]) print(f"guest_name: {guest_name}\tinsertion_work: {work:.2f}") # equilibrate for step in range(eq_steps): ctxt.step(0.00) if step % 1000 == 0: report.report_step(ctxt, step, 0.00, host_box, combined_bps, u_impls, guest_name, eq_steps, 'EQUILIBRATION') host_coords = ctxt.get_x_t()[:len(solvated_host_coords)] * 10 guest_coords = ctxt.get_x_t()[len(solvated_host_coords):] * 10 report.write_frame( host_coords, solvated_host_mol, guest_coords, guest_mol, guest_name, outdir, str(step).zfill(len(str(eq_steps))), f"eq", ) if step in (0, int(eq_steps / 2), eq_steps - 1): if report.too_much_force(ctxt, 0.00, host_box, combined_bps, u_impls): break end_time = time.time() print(f"{guest_name} took {(end_time - start_time):.2f} seconds")
def benchmark_dhfr(): pdb_path = 'tests/data/5dfr_solv_equil.pdb' host_pdb = app.PDBFile(pdb_path) protein_ff = app.ForceField('amber99sbildn.xml', 'tip3p.xml') host_system = protein_ff.createSystem( host_pdb.topology, nonbondedMethod=app.NoCutoff, constraints=None, rigidWater=False ) host_coords = host_pdb.positions box = host_pdb.topology.getPeriodicBoxVectors() box = np.asarray(box/box.unit) host_fns, host_masses = openmm_deserializer.deserialize_system( host_system, cutoff=1.0 ) host_conf = [] for x,y,z in host_coords: host_conf.append([to_md_units(x),to_md_units(y),to_md_units(z)]) host_conf = np.array(host_conf) seed = 1234 dt = 1.5e-3 intg = LangevinIntegrator( 300, dt, 1.0, np.array(host_masses), seed ).impl() bps = [] for potential in host_fns: bps.append(potential.bound_impl(precision=np.float32)) # get the bound implementation x0 = host_conf v0 = np.zeros_like(host_conf) ctxt = custom_ops.Context( x0, v0, box, intg, bps ) # initialize observables obs = [] for bp in bps: du_dp_obs = custom_ops.AvgPartialUPartialParam(bp, 100) ctxt.add_observable(du_dp_obs) obs.append(du_dp_obs) lamb = 0.0 start = time.time() # num_steps = 50000 num_steps = 50000 # num_steps = 10 writer = PDBWriter([host_pdb.topology], "dhfr.pdb") for step in range(num_steps): ctxt.step(lamb) if step % 1000 == 0: delta = time.time()-start steps_per_second = step/delta seconds_per_day = 86400 steps_per_day = steps_per_second*seconds_per_day ps_per_day = dt*steps_per_day ns_per_day = ps_per_day*1e-3 print(step, "ns/day", ns_per_day) # coords = recenter(ctxt.get_x_t(), box) # writer.write_frame(coords*10) print("total time", time.time() - start) writer.close() # bond angle torsions nonbonded for potential, du_dp_obs in zip(host_fns, obs): dp = du_dp_obs.avg_du_dp() print(potential, dp.shape) print(dp)
def test_benchmark(self): pdb_path = 'tests/data/5dfr_solv_equil.pdb' host_pdb = app.PDBFile(pdb_path) protein_ff = app.ForceField('amber99sbildn.xml', 'tip3p.xml') host_system = protein_ff.createSystem(host_pdb.topology, nonbondedMethod=app.NoCutoff, constraints=None, rigidWater=False) host_coords = host_pdb.positions box = host_pdb.topology.getPeriodicBoxVectors() box = np.asarray(box / box.unit) host_fns, host_masses = openmm_deserializer.deserialize_system( host_system, cutoff=1.0) for f in host_fns: if isinstance(f, potentials.Nonbonded): nonbonded_fn = f host_conf = [] for x, y, z in host_coords: host_conf.append([to_md_units(x), to_md_units(y), to_md_units(z)]) host_conf = np.array(host_conf) beta = 2.0 cutoff = 1.1 lamb = 0.0 N = host_conf.shape[0] test_conf = host_conf[:N] # process exclusions test_exclusions = [] test_scales = [] for (i, j), (sa, sb) in zip(nonbonded_fn.get_exclusion_idxs(), nonbonded_fn.get_scale_factors()): if i < N and j < N: test_exclusions.append((i, j)) test_scales.append((sa, sb)) test_exclusions = np.array(test_exclusions, dtype=np.int32) test_scales = np.array(test_scales, dtype=np.float64) test_params = nonbonded_fn.params[:N, :] test_lambda_plane_idxs = np.zeros(N, dtype=np.int32) test_lambda_offset_idxs = np.zeros(N, dtype=np.int32) test_nonbonded_fn = potentials.Nonbonded(test_exclusions, test_scales, test_lambda_plane_idxs, test_lambda_offset_idxs, beta, cutoff) precision = np.float32 impl = test_nonbonded_fn.unbound_impl(precision) for _ in range(100): impl.execute_du_dx(test_conf, test_params, box, lamb)
def test_dhfr(self): pdb_path = 'tests/data/5dfr_solv_equil.pdb' host_pdb = app.PDBFile(pdb_path) protein_ff = app.ForceField('amber99sbildn.xml', 'tip3p.xml') host_system = protein_ff.createSystem(host_pdb.topology, nonbondedMethod=app.NoCutoff, constraints=None, rigidWater=False) host_coords = host_pdb.positions box = host_pdb.topology.getPeriodicBoxVectors() box = np.asarray(box / box.unit) host_fns, host_masses = openmm_deserializer.deserialize_system( host_system, cutoff=1.0) for f in host_fns: if isinstance(f, potentials.Nonbonded): nonbonded_fn = f host_conf = [] for x, y, z in host_coords: host_conf.append([to_md_units(x), to_md_units(y), to_md_units(z)]) host_conf = np.array(host_conf) beta = 2.0 cutoff = 1.1 lamb = 0.1 max_N = host_conf.shape[0] for N in [33, 65, 231, 1050, 4080]: print("N", N) test_conf = host_conf[:N] # process exclusions test_exclusions = [] test_scales = [] for (i, j), (sa, sb) in zip(nonbonded_fn.get_exclusion_idxs(), nonbonded_fn.get_scale_factors()): if i < N and j < N: test_exclusions.append((i, j)) test_scales.append((sa, sb)) test_exclusions = np.array(test_exclusions, dtype=np.int32) test_scales = np.array(test_scales, dtype=np.float64) test_params = nonbonded_fn.params[:N, :] test_lambda_plane_idxs = np.random.randint(low=-2, high=2, size=N, dtype=np.int32) test_lambda_offset_idxs = np.random.randint(low=-2, high=2, size=N, dtype=np.int32) test_nonbonded_fn = potentials.Nonbonded(test_exclusions, test_scales, test_lambda_plane_idxs, test_lambda_offset_idxs, beta, cutoff) ref_nonbonded_fn = prepare_reference_nonbonded( test_params, test_exclusions, test_scales, test_lambda_plane_idxs, test_lambda_offset_idxs, beta, cutoff) for precision, rtol in [(np.float64, 1e-8), (np.float32, 1e-4)]: self.compare_forces(test_conf, test_params, box, lamb, ref_nonbonded_fn, test_nonbonded_fn, rtol, precision=precision)
def pose_dock( guests_sdfile, host_pdbfile, transition_type, n_steps, transition_steps, max_lambda, outdir, random_rotation=False, constant_atoms=[], ): """Runs short simulations in which the guests phase in or out over time Parameters ---------- guests_sdfile: path to input sdf with guests to pose/dock host_pdbfile: path to host pdb file to dock into transition_type: "insertion" or "deletion" n_steps: how many total steps of simulation to do (recommended: <= 1000) transition_steps: how many steps to insert/delete the guest over (recommended: <= 500) (must be <= n_steps) max_lambda: lambda value the guest should insert from or delete to (recommended: 1.0 for work calulation, 0.25 to stay close to original pose) (must be =1 for work calculation to be applicable) outdir: where to write output (will be created if it does not already exist) random_rotation: whether to apply a random rotation to each guest before inserting constant_atoms: atom numbers from the host_pdbfile to hold mostly fixed across the simulation (1-indexed, like PDB files) Output ------ A pdb & sdf file every 100 steps (outdir/<guest_name>_<step>.pdb) stdout every 100 steps noting the step number, lambda value, and energy stdout for each guest noting the work of transition stdout for each guest noting how long it took to run Note ---- If any norm of force per atom exceeds 20000 kJ/(mol*nm) [MAX_NORM_FORCE defined in docking/report.py], the simulation for that guest will stop and the work will not be calculated. """ assert transition_steps <= n_steps assert transition_type in ("insertion", "deletion") if random_rotation: assert transition_type == "insertion" if not os.path.exists(outdir): os.makedirs(outdir) host_mol = Chem.MolFromPDBFile(host_pdbfile, removeHs=False) amber_ff = app.ForceField("amber99sbildn.xml", "tip3p.xml") host_file = PDBFile(host_pdbfile) host_system = amber_ff.createSystem( host_file.topology, nonbondedMethod=app.NoCutoff, constraints=None, rigidWater=False, ) host_conf = [] for x, y, z in host_file.positions: host_conf.append([to_md_units(x), to_md_units(y), to_md_units(z)]) host_conf = np.array(host_conf) final_potentials = [] host_potentials, host_masses = openmm_deserializer.deserialize_system( host_system, cutoff=1.2) host_nb_bp = None for bp in host_potentials: if isinstance(bp, potentials.Nonbonded): # (ytz): hack to ensure we only have one nonbonded term assert host_nb_bp is None host_nb_bp = bp else: final_potentials.append(bp) # TODO (ytz): we should really fix this later on. This padding was done to # address the particles that are too close to the boundary. padding = 0.1 box_lengths = np.amax(host_conf, axis=0) - np.amin(host_conf, axis=0) box_lengths = box_lengths + padding box = np.eye(3, dtype=np.float64) * box_lengths suppl = Chem.SDMolSupplier(guests_sdfile, removeHs=False) for guest_mol in suppl: start_time = time.time() guest_name = guest_mol.GetProp("_Name") guest_ff_handlers = deserialize_handlers( open( os.path.join( os.path.dirname(os.path.abspath(__file__)), "..", "ff/params/smirnoff_1_1_0_ccc.py", )).read()) ff = Forcefield(guest_ff_handlers) guest_base_topology = topology.BaseTopology(guest_mol, ff) # combine hgt = topology.HostGuestTopology(host_nb_bp, guest_base_topology) # setup the parameter handlers for the ligand bonded_tuples = [[hgt.parameterize_harmonic_bond, ff.hb_handle], [hgt.parameterize_harmonic_angle, ff.ha_handle], [hgt.parameterize_proper_torsion, ff.pt_handle], [hgt.parameterize_improper_torsion, ff.it_handle]] these_potentials = list(final_potentials) # instantiate the vjps while parameterizing (forward pass) for fn, handle in bonded_tuples: params, potential = fn(handle.params) these_potentials.append(potential.bind(params)) nb_params, nb_potential = hgt.parameterize_nonbonded( ff.q_handle.params, ff.lj_handle.params) these_potentials.append(nb_potential.bind(nb_params)) bps = these_potentials guest_masses = [a.GetMass() for a in guest_mol.GetAtoms()] masses = np.concatenate([host_masses, guest_masses]) for atom_num in constant_atoms: masses[atom_num - 1] += 50000 conformer = guest_mol.GetConformer(0) mol_conf = np.array(conformer.GetPositions(), dtype=np.float64) mol_conf = mol_conf / 10 # convert to md_units if random_rotation: center = np.mean(mol_conf, axis=0) mol_conf -= center from scipy.stats import special_ortho_group mol_conf = np.matmul(mol_conf, special_ortho_group.rvs(3)) mol_conf += center x0 = np.concatenate([host_conf, mol_conf]) # combined geometry v0 = np.zeros_like(x0) seed = 2021 intg = LangevinIntegrator(300, 1.5e-3, 1.0, masses, seed).impl() impls = [] precision = np.float32 for b in bps: p_impl = b.bound_impl(precision) impls.append(p_impl) ctxt = custom_ops.Context(x0, v0, box, intg, impls) # collect a du_dl calculation once every other step subsample_freq = 2 du_dl_obs = custom_ops.FullPartialUPartialLambda(impls, subsample_freq) ctxt.add_observable(du_dl_obs) if transition_type == "insertion": new_lambda_schedule = np.concatenate([ np.linspace(max_lambda, 0.0, transition_steps), np.zeros(n_steps - transition_steps), ]) elif transition_type == "deletion": new_lambda_schedule = np.concatenate([ np.linspace(0.0, max_lambda, transition_steps), np.ones(n_steps - transition_steps) * max_lambda, ]) else: raise (RuntimeError( 'invalid `transition_type` (must be one of ["insertion", "deletion"])' )) calc_work = True for step, lamb in enumerate(new_lambda_schedule): ctxt.step(lamb) if step % 100 == 0: report.report_step(ctxt, step, lamb, box, bps, impls, guest_name, n_steps, 'pose_dock') host_coords = ctxt.get_x_t()[:len(host_conf)] * 10 guest_coords = ctxt.get_x_t()[len(host_conf):] * 10 report.write_frame(host_coords, host_mol, guest_coords, guest_mol, guest_name, outdir, step, 'pd') if step in (0, int(n_steps / 2), n_steps - 1): if report.too_much_force(ctxt, lamb, box, bps, impls): calc_work = False break # Note: this condition only applies for ABFE, not RBFE if (abs(du_dl_obs.full_du_dl()[0]) > 0.001 or abs(du_dl_obs.full_du_dl()[-1]) > 0.001): print("Error: du_dl endpoints are not ~0") calc_work = False if calc_work: work = np.trapz(du_dl_obs.full_du_dl(), new_lambda_schedule[::subsample_freq]) print(f"guest_name: {guest_name}\twork: {work:.2f}") end_time = time.time() print(f"{guest_name} took {(end_time - start_time):.2f} seconds")
# generate conformers AllChem.EmbedMolecule(romol_a) AllChem.EmbedMolecule(romol_b) # extract the 0th conformer ligand_coords_a = get_romol_conf(romol_a) ligand_coords_b = get_romol_conf(romol_b) # construct a 4-nanometer water box (from openmmtools approach: selecting out # of a large pre-equilibrated water box snapshot) system, host_coords, box, omm_topology = builders.build_water_system(4.0) # padding to avoid jank box = box + np.eye(3) * 0.1 host_bps, host_masses = openmm_deserializer.deserialize_system(system, cutoff=1.2) combined_masses = np.concatenate( [host_masses, ligand_masses_a, ligand_masses_b]) # minimize coordinates # note: .py file rather than .offxml file # note: _ccc suffix means "correctable charge corrections" ff_handlers = deserialize_handlers( open('ff/params/smirnoff_1_1_0_ccc.py').read()) ff = Forcefield(ff_handlers) # for RHFE we need to insert the reference ligand first, before inserting the # decoupling ligand minimized_coords = minimizer.minimize_4d(romol_a, system, host_coords, ff, box)
def create_system(guest_mol, host_pdb, handlers, stage, core_atoms, restr_force, restr_alpha, restr_count): """ Initialize a self-encompassing System object that we can serialize and simulate. Parameters ---------- guest_mol: rdkit.ROMol protein: openmm.System """ # host_system = protein_system guest_masses = np.array([a.GetMass() for a in guest_mol.GetAtoms()], dtype=np.float64) amber_ff = app.ForceField('amber99sbildn.xml', 'amber99_obc.xml') host_system = amber_ff.createSystem(host_pdb.topology, nonbondedMethod=app.NoCutoff, constraints=None, rigidWater=False) host_fns, host_masses = openmm_deserializer.deserialize_system(host_system) num_host_atoms = len(host_masses) num_guest_atoms = guest_mol.GetNumAtoms() # Name, Args, vjp_fn final_gradients = [] final_vjp_fns = [] for item in host_fns: if item[0] == 'LennardJones': host_lj_params = item[1] elif item[0] == 'Charges': host_charge_params = item[1] elif item[0] == 'GBSA': host_gb_params = item[1][0] host_gb_props = item[1][1:] elif item[0] == 'Exclusions': host_exclusions = item[1] else: final_gradients.append((item[0], item[1])) final_vjp_fns.append(None) # print("Ligand A Name:", a_name) guest_exclusion_idxs, guest_scales = nonbonded.generate_exclusion_idxs( guest_mol, scale12=1.0, scale13=1.0, scale14=0.5) guest_exclusion_idxs += num_host_atoms guest_lj_exclusion_scales = guest_scales guest_charge_exclusion_scales = guest_scales host_exclusion_idxs = host_exclusions[0] host_lj_exclusion_scales = host_exclusions[1] host_charge_exclusion_scales = host_exclusions[2] combined_exclusion_idxs = np.concatenate( [host_exclusion_idxs, guest_exclusion_idxs]) combined_lj_exclusion_scales = np.concatenate( [host_lj_exclusion_scales, guest_lj_exclusion_scales]) combined_charge_exclusion_scales = np.concatenate( [host_charge_exclusion_scales, guest_charge_exclusion_scales]) # handler_vjps = [] for handle in handlers: results = handle.parameterize(guest_mol) if isinstance(handle, bonded.HarmonicBondHandler): bond_idxs, (bond_params, bond_vjp_fn) = results bond_idxs += num_host_atoms final_gradients.append(("HarmonicBond", (bond_idxs, bond_params))) final_vjp_fns.append((bond_vjp_fn)) # handler_vjps.append(bond_vjp_fn) elif isinstance(handle, bonded.HarmonicAngleHandler): angle_idxs, (angle_params, angle_vjp_fn) = results angle_idxs += num_host_atoms final_gradients.append( ("HarmonicAngle", (angle_idxs, angle_params))) final_vjp_fns.append(angle_vjp_fn) # handler_vjps.append(angle_vjp_fn) elif isinstance(handle, bonded.ProperTorsionHandler): torsion_idxs, (torsion_params, torsion_vjp_fn) = results torsion_idxs += num_host_atoms final_gradients.append( ("PeriodicTorsion", (torsion_idxs, torsion_params))) final_vjp_fns.append(torsion_vjp_fn) # handler_vjps.append(torsion_vjp_fn) # guest_vjp_fns.append(torsion_vjp_fn) elif isinstance(handle, bonded.ImproperTorsionHandler): torsion_idxs, (torsion_params, torsion_vjp_fn) = results torsion_idxs += num_host_atoms final_gradients.append( ("PeriodicTorsion", (torsion_idxs, torsion_params))) final_vjp_fns.append(torsion_vjp_fn) # handler_vjps.append(torsion_vjp_fn) elif isinstance(handle, nonbonded.LennardJonesHandler): guest_lj_params, guest_lj_vjp_fn = results combined_lj_params, combined_lj_vjp_fn = concat_with_vjps( host_lj_params, guest_lj_params, None, guest_lj_vjp_fn) # handler_vjps.append(lj_adjoint_fn) elif isinstance(handle, nonbonded.SimpleChargeHandler): guest_charge_params, guest_charge_vjp_fn = results combined_charge_params, combined_charge_vjp_fn = concat_with_vjps( host_charge_params, guest_charge_params, None, guest_charge_vjp_fn) # handler_vjps.append(charge_adjoint_fn) elif isinstance(handle, nonbonded.GBSAHandler): guest_gb_params, guest_gb_vjp_fn = results combined_gb_params, combined_gb_vjp_fn = concat_with_vjps( host_gb_params, guest_gb_params, None, guest_gb_vjp_fn) # handler_vjps.append(gb_adjoint_fn) elif isinstance(handle, nonbonded.AM1BCCHandler): # ill defined behavior if both SimpleChargeHandler and AM1Handler is present guest_charge_params, guest_charge_vjp_fn = results combined_charge_params, combined_charge_vjp_fn = concat_with_vjps( host_charge_params, guest_charge_params, None, guest_charge_vjp_fn) # handler_vjps.append(gb_adjoint_fn) elif isinstance(handle, nonbonded.AM1CCCHandler): guest_charge_params, guest_charge_vjp_fn = results combined_charge_params, combined_charge_vjp_fn = concat_with_vjps( host_charge_params, guest_charge_params, None, guest_charge_vjp_fn) # handler_vjps.append(gb_adjoint_fn) else: raise Exception("Unknown Handler", handle) # (use the below vjps for correctness) # combined_charge_params, charge_adjoint_fn = concat_with_vjps(host_charge_params, guest_charge_params, None, guest_charge_vjp_fn) # combined_lj_params, lj_adjoint_fn = concat_with_vjps(host_lj_params, guest_lj_params, None, guest_lj_vjp_fn) # combined_gb_params, gb_adjoint_fn = concat_with_vjps(host_gb_params, guest_gb_params, None, guest_gb_vjp_fn) # WIP N_C = num_host_atoms + num_guest_atoms N_A = num_host_atoms if stage == 0: combined_lambda_plane_idxs = np.zeros(N_C, dtype=np.int32) combined_lambda_offset_idxs = np.zeros(N_C, dtype=np.int32) elif stage == 1: combined_lambda_plane_idxs = np.zeros(N_C, dtype=np.int32) combined_lambda_offset_idxs = np.zeros(N_C, dtype=np.int32) combined_lambda_offset_idxs[N_A:] = 1 elif stage == 2: combined_lambda_plane_idxs = np.zeros(N_C, dtype=np.int32) combined_lambda_plane_idxs[N_A:] = 1 combined_lambda_offset_idxs = np.zeros(N_C, dtype=np.int32) # assert 0 cutoff = 100000.0 final_gradients.append( ('Nonbonded', (np.asarray(combined_charge_params), np.asarray(combined_lj_params), combined_exclusion_idxs, combined_charge_exclusion_scales, combined_lj_exclusion_scales, combined_lambda_plane_idxs, combined_lambda_offset_idxs, cutoff))) final_vjp_fns.append((combined_charge_vjp_fn, combined_lj_vjp_fn)) final_gradients.append( ('GBSA', (np.asarray(combined_charge_params), np.asarray(combined_gb_params), combined_lambda_plane_idxs, combined_lambda_offset_idxs, *host_gb_props, cutoff, cutoff))) final_vjp_fns.append((combined_charge_vjp_fn, combined_gb_vjp_fn)) host_conf = [] for x, y, z in host_pdb.positions: host_conf.append([to_md_units(x), to_md_units(y), to_md_units(z)]) host_conf = np.array(host_conf) conformer = guest_mol.GetConformer(0) mol_a_conf = np.array(conformer.GetPositions(), dtype=np.float64) mol_a_conf = mol_a_conf / 10 # convert to md_units x0 = np.concatenate([host_conf, mol_a_conf]) # combined geometry v0 = np.zeros_like(x0) # build restraints using the coordinates backbone_atoms = [] for r_idx, residue in enumerate(host_pdb.getTopology().residues()): for a in residue.atoms(): if a.name == 'CA': backbone_atoms.append(a.index) final_gradients.append( setup_core_restraints(restr_force, restr_alpha, restr_count, x0, num_host_atoms, core_atoms, backbone_atoms, stage=stage)) final_vjp_fns.append(None) combined_masses = np.concatenate([host_masses, guest_masses]) return x0, combined_masses, final_gradients, final_vjp_fns
def calculate_rigorous_work( host_pdbfile, guests_sdfile, outdir, fewer_outfiles=False, no_outfiles=False ): """ """ if not os.path.exists(outdir): os.makedirs(outdir) print( f""" HOST_PDBFILE = {host_pdbfile} GUESTS_SDFILE = {guests_sdfile} OUTDIR = {outdir} INSERTION_MAX_LAMBDA = {INSERTION_MAX_LAMBDA} DELETION_MAX_LAMBDA = {DELETION_MAX_LAMBDA} MIN_LAMBDA = {MIN_LAMBDA} TRANSITION_STEPS = {TRANSITION_STEPS} EQ1_STEPS = {EQ1_STEPS} EQ2_STEPS = {EQ2_STEPS} """ ) # Prepare host # TODO: handle extra (non-transitioning) guests? print("Solvating host...") ( solvated_host_system, solvated_host_coords, _, _, host_box, solvated_topology, ) = builders.build_protein_system(host_pdbfile) # sometimes water boxes are sad. Should be minimized first; this is a workaround host_box += np.eye(3) * 0.1 print("host box", host_box) solvated_host_pdb = os.path.join(outdir, "solvated_host.pdb") writer = pdb_writer.PDBWriter([solvated_topology], solvated_host_pdb) writer.write_frame(solvated_host_coords) writer.close() solvated_host_mol = Chem.MolFromPDBFile(solvated_host_pdb, removeHs=False) if no_outfiles: os.remove(solvated_host_pdb) final_host_potentials = [] host_potentials, host_masses = openmm_deserializer.deserialize_system(solvated_host_system, cutoff=1.2) host_nb_bp = None for bp in host_potentials: if isinstance(bp, potentials.Nonbonded): # (ytz): hack to ensure we only have one nonbonded term assert host_nb_bp is None host_nb_bp = bp else: final_host_potentials.append(bp) # Prepare water box print("Generating water box...") # TODO: water box probably doesn't need to be this big box_lengths = host_box[np.diag_indices(3)] water_box_width = min(box_lengths) ( water_system, orig_water_coords, water_box, water_topology, ) = builders.build_water_system(water_box_width) # sometimes water boxes are sad. should be minimized first; this is a workaround water_box += np.eye(3) * 0.1 print("water box", water_box) # it's okay if the water box here and the solvated protein box don't align -- they have PBCs water_pdb = os.path.join(outdir, "water_box.pdb") writer = pdb_writer.PDBWriter([water_topology], water_pdb) writer.write_frame(orig_water_coords) writer.close() water_mol = Chem.MolFromPDBFile(water_pdb, removeHs=False) if no_outfiles: os.remove(water_pdb) final_water_potentials = [] water_potentials, water_masses = openmm_deserializer.deserialize_system(water_system, cutoff=1.2) water_nb_bp = None for bp in water_potentials: if isinstance(bp, potentials.Nonbonded): # (ytz): hack to ensure we only have one nonbonded term assert water_nb_bp is None water_nb_bp = bp else: final_water_potentials.append(bp) # Run the procedure print("Getting guests...") suppl = Chem.SDMolSupplier(guests_sdfile, removeHs=False) for guest_mol in suppl: start_time = time.time() guest_name = guest_mol.GetProp("_Name") guest_conformer = guest_mol.GetConformer(0) orig_guest_coords = np.array(guest_conformer.GetPositions(), dtype=np.float64) orig_guest_coords = orig_guest_coords / 10 # convert to md_units guest_ff_handlers = deserialize_handlers( open( os.path.join( os.path.dirname(os.path.abspath(__file__)), "..", "ff/params/smirnoff_1_1_0_ccc.py", ) ).read() ) ff = Forcefield(guest_ff_handlers) guest_base_top = topology.BaseTopology(guest_mol, ff) # combine host & guest hgt = topology.HostGuestTopology(host_nb_bp, guest_base_top) # setup the parameter handlers for the ligand bonded_tuples = [ [hgt.parameterize_harmonic_bond, ff.hb_handle], [hgt.parameterize_harmonic_angle, ff.ha_handle], [hgt.parameterize_proper_torsion, ff.pt_handle], [hgt.parameterize_improper_torsion, ff.it_handle] ] combined_bps = list(final_host_potentials) # instantiate the vjps while parameterizing (forward pass) for fn, handle in bonded_tuples: params, potential = fn(handle.params) combined_bps.append(potential.bind(params)) nb_params, nb_potential = hgt.parameterize_nonbonded(ff.q_handle.params, ff.lj_handle.params) combined_bps.append(nb_potential.bind(nb_params)) guest_masses = [a.GetMass() for a in guest_mol.GetAtoms()] combined_masses = np.concatenate([host_masses, guest_masses]) run_leg( solvated_host_coords, orig_guest_coords, combined_bps, combined_masses, host_box, guest_name, "host", solvated_host_mol, guest_mol, outdir, fewer_outfiles, no_outfiles, ) end_time = time.time() print( f"{guest_name} host leg time:", "%.2f" % (end_time - start_time), "seconds" ) # combine water & guest wgt = topology.HostGuestTopology(water_nb_bp, guest_base_top) # setup the parameter handlers for the ligand bonded_tuples = [ [wgt.parameterize_harmonic_bond, ff.hb_handle], [wgt.parameterize_harmonic_angle, ff.ha_handle], [wgt.parameterize_proper_torsion, ff.pt_handle], [wgt.parameterize_improper_torsion, ff.it_handle] ] combined_bps = list(final_water_potentials) # instantiate the vjps while parameterizing (forward pass) for fn, handle in bonded_tuples: params, potential = fn(handle.params) combined_bps.append(potential.bind(params)) nb_params, nb_potential = wgt.parameterize_nonbonded(ff.q_handle.params, ff.lj_handle.params) combined_bps.append(nb_potential.bind(nb_params)) guest_masses = [a.GetMass() for a in guest_mol.GetAtoms()] combined_masses = np.concatenate([water_masses, guest_masses]) start_time = time.time() run_leg( orig_water_coords, orig_guest_coords, combined_bps, combined_masses, water_box, guest_name, "water", water_mol, guest_mol, outdir, fewer_outfiles, no_outfiles, ) end_time = time.time() print( f"{guest_name} water leg time:", "%.2f" % (end_time - start_time), "seconds" )
def create_system(guest_mol, host_pdb, handlers): """ Initialize a self-encompassing System object that we can serialize and simulate. Parameters ---------- guest_mol: rdkit.ROMol guest molecule host_pdb: openmm.PDBFile host system from OpenMM handlers: list of timemachine.ops.Gradients forcefield handlers used to parameterize the small molecule Returns ------- 3-tuple x0, combined_masses, final_gradients """ guest_masses = np.array([a.GetMass() for a in guest_mol.GetAtoms()], dtype=np.float64) amber_ff = app.ForceField('amber99sbildn.xml', 'amber99_obc.xml') host_system = amber_ff.createSystem(host_pdb.topology, nonbondedMethod=app.NoCutoff, constraints=None, rigidWater=False) host_fns, host_masses = openmm_deserializer.deserialize_system(host_system) num_host_atoms = len(host_masses) num_guest_atoms = guest_mol.GetNumAtoms() # Name, Args, vjp_fn final_gradients = [] for item in host_fns: if item[0] == 'LennardJones': host_lj_params = item[1] elif item[0] == 'Charges': host_charge_params = item[1] elif item[0] == 'GBSA': host_gb_params = item[1][0] host_gb_props = item[1][1:] elif item[0] == 'Exclusions': host_exclusions = item[1] else: final_gradients.append((item[0], item[1])) guest_exclusion_idxs, guest_scales = nonbonded.generate_exclusion_idxs( guest_mol, scale12=1.0, scale13=1.0, scale14=0.5) guest_exclusion_idxs += num_host_atoms guest_lj_exclusion_scales = guest_scales guest_charge_exclusion_scales = guest_scales host_exclusion_idxs = host_exclusions[0] host_lj_exclusion_scales = host_exclusions[1] host_charge_exclusion_scales = host_exclusions[2] combined_exclusion_idxs = np.concatenate( [host_exclusion_idxs, guest_exclusion_idxs]) combined_lj_exclusion_scales = np.concatenate( [host_lj_exclusion_scales, guest_lj_exclusion_scales]) combined_charge_exclusion_scales = np.concatenate( [host_charge_exclusion_scales, guest_charge_exclusion_scales]) for handle in handlers: results = handle.parameterize(guest_mol) if isinstance(handle, bonded.HarmonicBondHandler): bond_idxs, (bond_params, _) = results bond_idxs += num_host_atoms final_gradients.append(("HarmonicBond", (bond_idxs, bond_params))) elif isinstance(handle, bonded.HarmonicAngleHandler): angle_idxs, (angle_params, _) = results angle_idxs += num_host_atoms final_gradients.append( ("HarmonicAngle", (angle_idxs, angle_params))) elif isinstance(handle, bonded.ProperTorsionHandler): torsion_idxs, (torsion_params, _) = results torsion_idxs += num_host_atoms final_gradients.append( ("PeriodicTorsion", (torsion_idxs, torsion_params))) elif isinstance(handle, bonded.ImproperTorsionHandler): torsion_idxs, (torsion_params, _) = results torsion_idxs += num_host_atoms final_gradients.append( ("PeriodicTorsion", (torsion_idxs, torsion_params))) elif isinstance(handle, nonbonded.LennardJonesHandler): guest_lj_params, _ = results combined_lj_params = np.concatenate( [host_lj_params, guest_lj_params]) elif isinstance(handle, nonbonded.SimpleChargeHandler): guest_charge_params, _ = results combined_charge_params = np.concatenate( [host_charge_params, guest_charge_params]) elif isinstance(handle, nonbonded.GBSAHandler): guest_gb_params, _ = results combined_gb_params = np.concatenate( [host_gb_params, guest_gb_params]) elif isinstance(handle, nonbonded.AM1BCCHandler): guest_charge_params, _ = results combined_charge_params = np.concatenate( [host_charge_params, guest_charge_params]) elif isinstance(handle, nonbonded.AM1CCCHandler): guest_charge_params, _ = results combined_charge_params = np.concatenate( [host_charge_params, guest_charge_params]) else: raise Exception("Unknown Handler", handle) host_conf = [] for x, y, z in host_pdb.positions: host_conf.append([to_md_units(x), to_md_units(y), to_md_units(z)]) host_conf = np.array(host_conf) conformer = guest_mol.GetConformer(0) mol_a_conf = np.array(conformer.GetPositions(), dtype=np.float64) mol_a_conf = mol_a_conf / 10 # convert to md_units center = np.mean(mol_a_conf, axis=0) mol_a_conf -= center from scipy.stats import special_ortho_group mol_a_conf = np.matmul(mol_a_conf, special_ortho_group.rvs(3)) mol_a_conf += center # assert 0 x0 = np.concatenate([host_conf, mol_a_conf]) # combined geometry v0 = np.zeros_like(x0) N_C = num_host_atoms + num_guest_atoms N_A = num_host_atoms cutoff = 100000.0 combined_lambda_plane_idxs = np.zeros(N_C, dtype=np.int32) combined_lambda_offset_idxs = np.zeros(N_C, dtype=np.int32) combined_lambda_offset_idxs[num_host_atoms:] = 1 final_gradients.append( ('Nonbonded', (np.asarray(combined_charge_params), np.asarray(combined_lj_params), combined_exclusion_idxs, combined_charge_exclusion_scales, combined_lj_exclusion_scales, combined_lambda_plane_idxs, combined_lambda_offset_idxs, cutoff))) final_gradients.append( ('GBSA', (np.asarray(combined_charge_params), np.asarray(combined_gb_params), combined_lambda_plane_idxs, combined_lambda_offset_idxs, *host_gb_props, cutoff, cutoff))) combined_masses = np.concatenate([host_masses, guest_masses]) return x0, combined_masses, final_gradients
def minimize_host_4d(romol, host_system, host_coords, ff, box): """ Insert romol into a host system via 4D decoupling under a Langevin thermostat. The ligand coordinates are fixed during this, and only host_coordinates are minimized. Parameters ---------- romol: ROMol Ligand to be inserted. It must be embedded. host_system: openmm.System OpenMM System representing the host host_coords: np.ndarray N x 3 coordinates of the host. units of nanometers. ff: ff.Forcefield Wrapper class around a list of handlers box: np.ndarray [3,3] Box matrix for periodic boundary conditions. units of nanometers. Returns ------- np.ndarray This returns minimized host_coords. """ host_bps, host_masses = openmm_deserializer.deserialize_system(host_system, cutoff=1.2) # keep the ligand rigid ligand_masses = [a.GetMass()*100000 for a in romol.GetAtoms()] combined_masses = np.concatenate([host_masses, ligand_masses]) ligand_coords = get_romol_conf(romol) combined_coords = np.concatenate([host_coords, ligand_coords]) num_host_atoms = host_coords.shape[0] final_potentials = [] for bp in host_bps: if isinstance(bp, potentials.Nonbonded): host_p = bp else: final_potentials.append(bp) gbt = topology.BaseTopology(romol, ff) hgt = topology.HostGuestTopology(host_p, gbt) # setup the parameter handlers for the ligand tuples = [ [hgt.parameterize_harmonic_bond, [ff.hb_handle]], [hgt.parameterize_harmonic_angle, [ff.ha_handle]], [hgt.parameterize_proper_torsion, [ff.pt_handle]], [hgt.parameterize_improper_torsion, [ff.it_handle]], [hgt.parameterize_nonbonded, [ff.q_handle, ff.lj_handle]], ] for fn, handles in tuples: params, potential = fn(*[h.params for h in handles]) final_potentials.append(potential.bind(params)) seed = 2020 intg = LangevinIntegrator( 300.0, 1.5e-3, 1.0, combined_masses, seed ).impl() x0 = combined_coords v0 = np.zeros_like(x0) u_impls = [] for bp in final_potentials: fn = bp.bound_impl(precision=np.float32) u_impls.append(fn) # context components: positions, velocities, box, integrator, energy fxns ctxt = custom_ops.Context( x0, v0, box, intg, u_impls ) for lamb in np.linspace(1.0, 0, 1000): ctxt.step(lamb) return ctxt.get_x_t()[:num_host_atoms]
def combine_potentials(ff_handlers, guest_mol, host_system, precision): """ This function is responsible for figuring out how to take two separate hamiltonians and combining them into one sensible alchemical system. Parameters ---------- ff_handlers: list of forcefield handlers Small molecule forcefield handlers guest_mol: Chem.ROMol RDKit molecule host_system: openmm.System Host system to be deserialized precision: np.float32 or np.float64 Numerical precision of the functional form Returns ------- tuple Returns a list of lib.potentials objects, combined masses, and a list of their corresponding vjp_fns back into the forcefield """ host_potentials, host_masses = openmm_deserializer.deserialize_system( host_system, precision, cutoff=1.0) host_nb_bp = None combined_potentials = [] combined_vjp_fns = [] for bp in host_potentials: if isinstance(bp, potentials.Nonbonded): # (ytz): hack to ensure we only have one nonbonded term assert host_nb_bp is None host_nb_bp = bp else: combined_potentials.append(bp) combined_vjp_fns.append([]) guest_masses = np.array([a.GetMass() for a in guest_mol.GetAtoms()], dtype=np.float64) num_guest_atoms = len(guest_masses) num_host_atoms = len(host_masses) combined_masses = np.concatenate([host_masses, guest_masses]) for handle in ff_handlers: results = handle.parameterize(guest_mol) if isinstance(handle, bonded.HarmonicBondHandler): bond_idxs, (bond_params, vjp_fn) = results bond_idxs += num_host_atoms combined_potentials.append( potentials.HarmonicBond(bond_idxs, precision=precision).bind(bond_params)) combined_vjp_fns.append([(handle, vjp_fn)]) elif isinstance(handle, bonded.HarmonicAngleHandler): angle_idxs, (angle_params, vjp_fn) = results angle_idxs += num_host_atoms combined_potentials.append( potentials.HarmonicAngle( angle_idxs, precision=precision).bind(angle_params)) combined_vjp_fns.append([(handle, vjp_fn)]) elif isinstance(handle, bonded.ProperTorsionHandler): torsion_idxs, (torsion_params, vjp_fn) = results torsion_idxs += num_host_atoms combined_potentials.append( potentials.PeriodicTorsion( torsion_idxs, precision=precision).bind(torsion_params)) combined_vjp_fns.append([(handle, vjp_fn)]) elif isinstance(handle, bonded.ImproperTorsionHandler): torsion_idxs, (torsion_params, vjp_fn) = results torsion_idxs += num_host_atoms combined_potentials.append( potentials.PeriodicTorsion( torsion_idxs, precision=precision).bind(torsion_params)) combined_vjp_fns.append([(handle, vjp_fn)]) elif isinstance(handle, nonbonded.AM1CCCHandler): charge_handle = handle guest_charge_params, guest_charge_vjp_fn = results elif isinstance(handle, nonbonded.LennardJonesHandler): guest_lj_params, guest_lj_vjp_fn = results lj_handle = handle else: print("Warning: skipping handler", handle) pass # process nonbonded terms combined_nb_params, (charge_vjp_fn, lj_vjp_fn) = nonbonded_vjps( guest_charge_params, guest_charge_vjp_fn, guest_lj_params, guest_lj_vjp_fn, host_nb_bp.params) # these vjp_fns take in adjoints of combined_params and returns derivatives # appropriate to the underlying handler combined_vjp_fns.append([(charge_handle, charge_vjp_fn), (lj_handle, lj_vjp_fn)]) # tbd change scale 14 for electrostatics guest_exclusion_idxs, guest_scale_factors = nonbonded.generate_exclusion_idxs( guest_mol, scale12=1.0, scale13=1.0, scale14=0.5) # allow the ligand to be alchemically decoupled # a value of one indicates that we allow the atom to be adjusted by the lambda value guest_lambda_offset_idxs = np.ones(len(guest_masses), dtype=np.int32) # use same scale factors until we modify 1-4s for electrostatics guest_scale_factors = np.stack([guest_scale_factors, guest_scale_factors], axis=1) combined_lambda_offset_idxs = np.concatenate( [host_nb_bp.get_lambda_offset_idxs(), guest_lambda_offset_idxs]) combined_exclusion_idxs = np.concatenate([ host_nb_bp.get_exclusion_idxs(), guest_exclusion_idxs + num_host_atoms ]) combined_scales = np.concatenate( [host_nb_bp.get_scale_factors(), guest_scale_factors]) combined_beta = 2.0 combined_cutoff = 1.0 # nonbonded cutoff combined_potentials.append( potentials.Nonbonded(combined_exclusion_idxs, combined_scales, combined_lambda_offset_idxs, combined_beta, combined_cutoff, precision=precision).bind(combined_nb_params)) return combined_potentials, combined_masses, combined_vjp_fns
def host_edge(self, lamb, host_system, host_coords, box, equil_steps=10000, prod_steps=100000): """ Run equilibrium decoupling simulation at a given value of lambda in a host environment. Parameters ---------- lamb: float [0, 1] 0 is the fully interacting system, and 1 is the non-interacting system host_system: openmm.System OpenMM System object to be deserialized. The host can be simply a box of water, or a fully solvated protein host_coords: np.array of shape [..., 3] Host coordinates, in nanometers. It should be properly minimized and not have clashes with the ligand coordinates. box: np.array [3,3] Periodic boundary conditions, in nanometers. equil_steps: float Number of steps to run equilibration. Statistics are not gathered. prod_steps: float Number of steps to run production. Statistics are gathered. Returns ------- float, float Returns a pair of average du_dl values for bonded and nonbonded terms. """ ligand_masses_a = [a.GetMass() for a in self.mol_a.GetAtoms()] ligand_masses_b = [b.GetMass() for b in self.mol_b.GetAtoms()] # extract the 0th conformer ligand_coords_a = get_romol_conf(self.mol_a) ligand_coords_b = get_romol_conf(self.mol_b) host_bps, host_masses = openmm_deserializer.deserialize_system(host_system, cutoff=1.2) num_host_atoms = host_coords.shape[0] final_potentials = [] final_vjp_and_handles = [] # keep the bonded terms in the host the same. # but we keep the nonbonded term for a subsequent modification for bp in host_bps: if isinstance(bp, potentials.Nonbonded): host_p = bp else: final_potentials.append([bp]) # (ytz): no protein ff support for now, so we skip their vjps final_vjp_and_handles.append(None) hgt = topology.HostGuestTopology(host_p, self.top) # setup the parameter handlers for the ligand bonded_tuples = [ [hgt.parameterize_harmonic_bond, self.ff.hb_handle], [hgt.parameterize_harmonic_angle, self.ff.ha_handle], [hgt.parameterize_proper_torsion, self.ff.pt_handle], [hgt.parameterize_improper_torsion, self.ff.it_handle] ] # instantiate the vjps while parameterizing (forward pass) for fn, handle in bonded_tuples: (src_params, dst_params, uni_params), vjp_fn, (src_potential, dst_potential, uni_potential) = jax.vjp(fn, handle.params, has_aux=True) final_potentials.append([src_potential.bind(src_params), dst_potential.bind(dst_params), uni_potential.bind(uni_params)]) final_vjp_and_handles.append((vjp_fn, handle)) nb_params, vjp_fn, nb_potential = jax.vjp(hgt.parameterize_nonbonded, self.ff.q_handle.params, self.ff.lj_handle.params, has_aux=True) final_potentials.append([nb_potential.bind(nb_params)]) final_vjp_and_handles.append([vjp_fn, (self.ff.q_handle, self.ff.lj_handle)]) # (ytz): note the handlers are a tuple, this is checked later combined_masses = np.concatenate([host_masses, np.mean(self.top.interpolate_params(ligand_masses_a, ligand_masses_b), axis=0)]) src_conf, dst_conf = self.top.interpolate_params(ligand_coords_a, ligand_coords_b) combined_coords = np.concatenate([host_coords, np.mean(self.top.interpolate_params(ligand_coords_a, ligand_coords_b), axis=0)]) # (ytz): us is short form for mean and std dev. bonded_us, nonbonded_us, grads = self._simulate( lamb, box, combined_coords, np.zeros_like(combined_coords), final_potentials, self._get_integrator(combined_masses), equil_steps, prod_steps ) grads_and_handles = [] for du_dqs, vjps_and_handles in zip(grads, final_vjp_and_handles): if vjps_and_handles is not None: vjp_fn = vjps_and_handles[0] handles = vjps_and_handles[1] # we need to get the shapes correct (eg. nonbonded vjp emits an ndarray, not a list.) # (ytz): so far nonbonded grads is the only term that map back out to two # vjp handlers (charge and lj). the vjp also expects an nd.array, not a list. So we kill # two birds with one stone here, but this is quite brittle and should be refactored later on. if type(handles) == tuple: # handle nonbonded terms du_dps = vjp_fn(du_dqs[0]) for du_dp, handler in zip(du_dps, handles): grads_and_handles.append((du_dp, type(handler))) else: du_dp = vjp_fn(du_dqs) # bonded terms return a list, so we need to flatten it here grads_and_handles.append((du_dp[0], type(handles))) return bonded_us, nonbonded_us, grads_and_handles
def host_edge(self, lamb, host_system, host_coords, box, equil_steps=10000, prod_steps=100000): """ Run equilibrium decoupling simulation at a given value of lambda in a host environment. Parameters ---------- lamb: float [0, 1] 0 is the fully interacting system, and 1 is the non-interacting system host_system: openmm.System OpenMM System object to be deserialized. The host can be simply a box of water, or a fully solvated protein host_coords: np.array of shape [..., 3] Host coordinates, in nanometers. It should be properly minimized and not have clashes with the ligand coordinates. box: np.array [3,3] Periodic boundary conditions, in nanometers. equil_steps: float Number of steps to run equilibration. Statistics are not gathered. prod_steps: float Number of steps to run production. Statistics are gathered. Returns ------- float, float Returns a pair of average du_dl values for bonded and nonbonded terms. """ ligand_masses = [a.GetMass() for a in self.mol.GetAtoms()] ligand_coords = get_romol_conf(self.mol) host_bps, host_masses = openmm_deserializer.deserialize_system(host_system, cutoff=1.2) num_host_atoms = host_coords.shape[0] final_potentials = [] final_vjp_and_handles = [] for bp in host_bps: if isinstance(bp, potentials.Nonbonded): host_p = bp else: final_potentials.append([bp]) final_vjp_and_handles.append(None) hgt = topology.HostGuestTopology(host_p, self.top) # setup the parameter handlers for the ligand bonded_tuples = [ [hgt.parameterize_harmonic_bond, self.ff.hb_handle], [hgt.parameterize_harmonic_angle, self.ff.ha_handle], [hgt.parameterize_proper_torsion, self.ff.pt_handle], [hgt.parameterize_improper_torsion, self.ff.it_handle] ] # instantiate the vjps while parameterizing (forward pass) for fn, handle in bonded_tuples: params, vjp_fn, potential = jax.vjp(fn, handle.params, has_aux=True) final_potentials.append([potential.bind(params)]) final_vjp_and_handles.append((vjp_fn, handle)) nb_params, vjp_fn, nb_potential = jax.vjp(hgt.parameterize_nonbonded, self.ff.q_handle.params, self.ff.lj_handle.params, has_aux=True) final_potentials.append([nb_potential.bind(nb_params)]) final_vjp_and_handles.append([vjp_fn]) combined_masses = np.concatenate([host_masses, ligand_masses]) combined_coords = np.concatenate([host_coords, ligand_coords]) return self._simulate( lamb, box, combined_coords, np.zeros_like(combined_coords), final_potentials, self._get_integrator(combined_masses), equil_steps, prod_steps )
def create_system(guest_mol, host_pdb, handlers, restr_search_radius, restr_force_constant, intg_temperature, stage): """ Initialize a self-encompassing System object that we can serialize and simulate. Parameters ---------- guest_mol: rdkit.ROMol guest molecule host_pdb: openmm.PDBFile host system from OpenMM handlers: list of timemachine.ops.Gradients forcefield handlers used to parameterize the system restr_search_radius: float how far away we search from the ligand to define the binding pocket atoms. restr_force_constant: float strength of the harmonic oscillator for the restraint intg_temperature: float temperature of the integrator in Kelvin stage: int (0 or 1) a free energy specific variable that determines how we decouple. """ guest_masses = np.array([a.GetMass() for a in guest_mol.GetAtoms()], dtype=np.float64) amber_ff = app.ForceField('amber99sbildn.xml', 'amber99_obc.xml') host_system = amber_ff.createSystem(host_pdb.topology, nonbondedMethod=app.NoCutoff, constraints=None, rigidWater=False) host_fns, host_masses = openmm_deserializer.deserialize_system(host_system) num_host_atoms = len(host_masses) num_guest_atoms = guest_mol.GetNumAtoms() # Name, Args, vjp_fn final_gradients = [] for item in host_fns: if item[0] == 'LennardJones': host_lj_params = item[1] elif item[0] == 'Charges': host_charge_params = item[1] elif item[0] == 'GBSA': host_gb_params = item[1][0] host_gb_props = item[1][1:] elif item[0] == 'Exclusions': host_exclusions = item[1] else: final_gradients.append((item[0], item[1])) guest_exclusion_idxs, guest_scales = nonbonded.generate_exclusion_idxs( guest_mol, scale12=1.0, scale13=1.0, scale14=0.5) guest_exclusion_idxs += num_host_atoms guest_lj_exclusion_scales = guest_scales guest_charge_exclusion_scales = guest_scales host_exclusion_idxs = host_exclusions[0] host_lj_exclusion_scales = host_exclusions[1] host_charge_exclusion_scales = host_exclusions[2] combined_exclusion_idxs = np.concatenate( [host_exclusion_idxs, guest_exclusion_idxs]) combined_lj_exclusion_scales = np.concatenate( [host_lj_exclusion_scales, guest_lj_exclusion_scales]) combined_charge_exclusion_scales = np.concatenate( [host_charge_exclusion_scales, guest_charge_exclusion_scales]) # We build up a map of handles to a corresponding vjp_fn that takes in adjoints of output parameters # for nonbonded terms, the vjp_fn has been modified to take in combined parameters handler_vjp_fns = {} for handle in handlers: results = handle.parameterize(guest_mol) if isinstance(handle, bonded.HarmonicBondHandler): bond_idxs, (bond_params, handler_vjp_fn) = results bond_idxs += num_host_atoms final_gradients.append(("HarmonicBond", (bond_idxs, bond_params))) elif isinstance(handle, bonded.HarmonicAngleHandler): angle_idxs, (angle_params, handler_vjp_fn) = results angle_idxs += num_host_atoms final_gradients.append( ("HarmonicAngle", (angle_idxs, angle_params))) elif isinstance(handle, bonded.ProperTorsionHandler): torsion_idxs, (torsion_params, handler_vjp_fn) = results torsion_idxs += num_host_atoms final_gradients.append( ("PeriodicTorsion", (torsion_idxs, torsion_params))) elif isinstance(handle, bonded.ImproperTorsionHandler): torsion_idxs, (torsion_params, handler_vjp_fn) = results torsion_idxs += num_host_atoms final_gradients.append( ("PeriodicTorsion", (torsion_idxs, torsion_params))) elif isinstance(handle, nonbonded.LennardJonesHandler): guest_lj_params, guest_lj_vjp_fn = results combined_lj_params, handler_vjp_fn = concat_with_vjps( host_lj_params, guest_lj_params, None, guest_lj_vjp_fn) elif isinstance(handle, nonbonded.SimpleChargeHandler): guest_charge_params, guest_charge_vjp_fn = results combined_charge_params, handler_vjp_fn = concat_with_vjps( host_charge_params, guest_charge_params, None, guest_charge_vjp_fn) elif isinstance(handle, nonbonded.GBSAHandler): guest_gb_params, guest_gb_vjp_fn = results combined_gb_params, handler_vjp_fn = concat_with_vjps( host_gb_params, guest_gb_params, None, guest_gb_vjp_fn) elif isinstance(handle, nonbonded.AM1BCCHandler): guest_charge_params, guest_charge_vjp_fn = results combined_charge_params, handler_vjp_fn = concat_with_vjps( host_charge_params, guest_charge_params, None, guest_charge_vjp_fn) elif isinstance(handle, nonbonded.AM1CCCHandler): guest_charge_params, guest_charge_vjp_fn = results combined_charge_params, handler_vjp_fn = concat_with_vjps( host_charge_params, guest_charge_params, None, guest_charge_vjp_fn) else: raise Exception("Unknown Handler", handle) handler_vjp_fns[handle] = handler_vjp_fn host_conf = [] for x, y, z in host_pdb.positions: host_conf.append([to_md_units(x), to_md_units(y), to_md_units(z)]) host_conf = np.array(host_conf) conformer = guest_mol.GetConformer(0) mol_a_conf = np.array(conformer.GetPositions(), dtype=np.float64) mol_a_conf = mol_a_conf / 10 # convert to md_units x0 = np.concatenate([host_conf, mol_a_conf]) # combined geometry v0 = np.zeros_like(x0) pocket_atoms = find_protein_pocket_atoms(x0, num_host_atoms, restr_search_radius) N_C = num_host_atoms + num_guest_atoms N_A = num_host_atoms cutoff = 100000.0 if stage == 0: combined_lambda_plane_idxs = np.zeros(N_C, dtype=np.int32) combined_lambda_offset_idxs = np.zeros(N_C, dtype=np.int32) elif stage == 1: combined_lambda_plane_idxs = np.zeros(N_C, dtype=np.int32) combined_lambda_offset_idxs = np.zeros(N_C, dtype=np.int32) combined_lambda_offset_idxs[num_host_atoms:] = 1 else: assert 0 final_gradients.append( ('Nonbonded', (np.asarray(combined_charge_params), np.asarray(combined_lj_params), combined_exclusion_idxs, combined_charge_exclusion_scales, combined_lj_exclusion_scales, combined_lambda_plane_idxs, combined_lambda_offset_idxs, cutoff))) final_gradients.append( ('GBSA', (np.asarray(combined_charge_params), np.asarray(combined_gb_params), combined_lambda_plane_idxs, combined_lambda_offset_idxs, *host_gb_props, cutoff, cutoff))) ligand_idxs = np.arange(N_A, N_C, dtype=np.int32) # restraints if stage == 0: lamb_flag = 1 lamb_offset = 0 if stage == 1: lamb_flag = 0 lamb_offset = 1 # unweighted center of mass restraints avg_xi = np.mean(x0[ligand_idxs], axis=0) avg_xj = np.mean(x0[pocket_atoms], axis=0) ctr_dij = np.sqrt(np.sum((avg_xi - avg_xj)**2)) combined_masses = np.concatenate([host_masses, guest_masses]) # restraints final_gradients.append( ('CentroidRestraint', (ligand_idxs, pocket_atoms, combined_masses, restr_force_constant, ctr_dij, lamb_flag, lamb_offset))) ssc = standard_state.harmonic_com_ssc(restr_force_constant, ctr_dij, intg_temperature) return x0, combined_masses, ssc, final_gradients, handler_vjp_fns