def test_extractPositionsFromOEMol(molecule=smiles_to_oemol('CC')): """ Generates an ethane OEMol from string and checks it returns positions of correct length and units Paramters ---------- smiles : str, default 'CC' default is ethane molecule Returns ------- positions : np.array openmm positions of molecule with units """ from perses.utils.openeye import extractPositionsFromOEMol import simtk.unit as unit positions = extractPositionsFromOEMol(molecule) assert (len(positions) == molecule.NumAtoms() ), "Positions extracted from OEMol does not match number of atoms" assert (positions.unit == unit.angstrom ), "Positions returned are not in expected units of angstrom" return positions
def createSystemFromIUPAC(iupac_name): """ Create an openmm system out of an oemol Parameters ---------- iupac_name : str IUPAC name Returns ------- molecule : openeye.OEMol OEMol molecule system : openmm.System object OpenMM system positions : [n,3] np.array of floats Positions topology : openmm.app.Topology object Topology """ from perses.utils.data import get_data_filename from perses.utils.openeye import extractPositionsFromOEMol # Create OEMol molecule = iupac_to_oemol(iupac_name) # Generate a topology. from openmoltools.forcefield_generators import generateTopologyFromOEMol topology = generateTopologyFromOEMol(molecule) # Initialize a forcefield with GAFF. # TODO: Fix path for `gaff.xml` since it is not yet distributed with OpenMM from simtk.openmm.app import ForceField gaff_xml_filename = get_data_filename('data/gaff.xml') forcefield = ForceField(gaff_xml_filename) # Generate template and parameters. from openmoltools.forcefield_generators import generateResidueTemplate [template, ffxml] = generateResidueTemplate(molecule) # Register the template. forcefield.registerResidueTemplate(template) # Add the parameters. forcefield.loadFile(StringIO(ffxml)) # Create the system. system = forcefield.createSystem(topology, removeCMMotion=False) # Extract positions positions = extractPositionsFromOEMol(molecule) return (molecule, system, positions, topology)
def generate_ligand_topologies_and_positions(ligand_filename): """ Generate the topologies and positions for ligand-only systems Parameters ---------- ligand_filename : str The name of the file containing the ligands in any OpenEye supported format Returns ------- ligand_topologies : dict of str: md.Topology A dictionary of the ligand topologies generated from the file indexed by SMILES strings ligand_positions_dict : dict of str: unit.Quantity array A dictionary of the corresponding positions, indexed by SMILES strings """ ifs = oechem.oemolistream() ifs.open(ligand_filename) # get the list of molecules mol_list = [oechem.OEMol(mol) for mol in ifs.GetOEMols()] for idx, mol in enumerate(mol_list): mol.SetTitle("MOL{}".format(idx)) oechem.OETriposAtomNames(mol) mol_dict = {oechem.OEMolToSmiles(mol): mol for mol in mol_list} ligand_topology_dict = { smiles: forcefield_generators.generateTopologyFromOEMol(mol) for smiles, mol in mol_dict.items() } ligand_topologies = {} ligand_positions_dict = {} for smiles, ligand_topology in ligand_topology_dict.items(): ligand_md_topology = md.Topology.from_openmm(ligand_topology) ligand_topologies[smiles] = ligand_md_topology ligand_positions = extractPositionsFromOEMol(mol_dict[smiles]) ligand_positions_dict[smiles] = ligand_positions return ligand_topologies, ligand_positions_dict
def __init__(self, protein_filename, mutation_chain_id, mutation_residue_id, proposed_residue, phase='complex', conduct_endstate_validation=True, ligand_input=None, ligand_index=0, water_model='tip3p', ionic_strength=0.15 * unit.molar, forcefield_files=['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'], barostat=openmm.MonteCarloBarostat(1.0 * unit.atmosphere, temperature, 50), forcefield_kwargs={'removeCMMotion': False, 'ewaldErrorTolerance': 0.00025, 'constraints' : app.HBonds, 'hydrogenMass' : 4 * unit.amus}, periodic_forcefield_kwargs={'nonbondedMethod': app.PME}, nonperiodic_forcefield_kwargs=None, small_molecule_forcefields='gaff-2.11', complex_box_dimensions=None, apo_box_dimensions=None, flatten_torsions=False, flatten_exceptions=False, repartitioned_endstate=None, **kwargs): """ arguments protein_filename : str path to protein (to mutate); .pdb mutation_chain_id : str name of the chain to be mutated mutation_residue_id : str residue id to change proposed_residue : str three letter code of the residue to mutate to phase : str, default complex if phase == vacuum, then the complex will not be solvated with water; else, it will be solvated with tip3p conduct_endstate_validation : bool, default True whether to conduct an endstate validation of the HybridTopologyFactory. If using the RepartitionedHybridTopologyFactory, endstate validation cannot and will not be conducted. ligand_file : str, default None path to ligand of interest (i.e. small molecule or protein); .sdf or .pdb ligand_index : int, default 0 which ligand to use water_model : str, default 'tip3p' solvent model to use for solvation ionic_strength : float * unit.molar, default 0.15 * unit.molar the total concentration of ions (both positive and negative) to add using Modeller. This does not include ions that are added to neutralize the system. Note that only monovalent ions are currently supported. forcefield_files : list of str, default ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'] forcefield files for proteins and solvent barostat : openmm.MonteCarloBarostat, default openmm.MonteCarloBarostat(1.0 * unit.atmosphere, 300 * unit.kelvin, 50) barostat to use forcefield_kwargs : dict, default {'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'constraints' : app.HBonds, 'hydrogenMass' : 4 * unit.amus} forcefield kwargs for system parametrization periodic_forcefield_kwargs : dict, default {'nonbondedMethod': app.PME} periodic forcefield kwargs for system parametrization nonperiodic_forcefield_kwargs : dict, default None non-periodic forcefield kwargs for system parametrization small_molecule_forcefields : str, default 'gaff-2.11' the forcefield string for small molecule parametrization complex_box_dimensions : Vec3, default None define box dimensions of complex phase; if None, padding is 1nm apo_box_dimensions : Vec3, default None define box dimensions of apo phase phase; if None, padding is 1nm flatten_torsions : bool, default False in the htf, flatten torsions involving unique new atoms at lambda = 0 and unique old atoms are lambda = 1 flatten_exceptions : bool, default False in the htf, flatten exceptions involving unique new atoms at lambda = 0 and unique old atoms at lambda = 1 repartitioned_endstate : int, default None the endstate (0 or 1) at which to build the RepartitionedHybridTopologyFactory. By default, this is None, meaning a vanilla HybridTopologyFactory will be built. TODO : allow argument for spectator ligands besides the 'ligand_file' """ # First thing to do is load the apo protein to mutate... protein_pdbfile = open(protein_filename, 'r') protein_pdb = app.PDBFile(protein_pdbfile) protein_pdbfile.close() protein_positions, protein_topology, protein_md_topology = protein_pdb.positions, protein_pdb.topology, md.Topology.from_openmm(protein_pdb.topology) protein_topology = protein_md_topology.to_openmm() protein_n_atoms = protein_md_topology.n_atoms # Load the ligand, if present molecules = [] if ligand_input: if isinstance(ligand_input, str): if ligand_input.endswith('.sdf'): # small molecule ligand_mol = createOEMolFromSDF(ligand_input, index=ligand_index) molecules.append(Molecule.from_openeye(ligand_mol, allow_undefined_stereo=False)) ligand_positions, ligand_topology = extractPositionsFromOEMol(ligand_mol), forcefield_generators.generateTopologyFromOEMol(ligand_mol) ligand_md_topology = md.Topology.from_openmm(ligand_topology) ligand_n_atoms = ligand_md_topology.n_atoms if ligand_input.endswith('pdb'): # protein ligand_pdbfile = open(ligand_input, 'r') ligand_pdb = app.PDBFile(ligand_pdbfile) ligand_pdbfile.close() ligand_positions, ligand_topology, ligand_md_topology = ligand_pdb.positions, ligand_pdb.topology, md.Topology.from_openmm( ligand_pdb.topology) ligand_n_atoms = ligand_md_topology.n_atoms elif isinstance(ligand_input, oechem.OEMol): # oemol object molecules.append(Molecule.from_openeye(ligand_input, allow_undefined_stereo=False)) ligand_positions, ligand_topology = extractPositionsFromOEMol(ligand_input), forcefield_generators.generateTopologyFromOEMol(ligand_input) ligand_md_topology = md.Topology.from_openmm(ligand_topology) ligand_n_atoms = ligand_md_topology.n_atoms else: _logger.warning(f'ligand filetype not recognised. Please provide a path to a .pdb or .sdf file') return # Now create a complex complex_md_topology = protein_md_topology.join(ligand_md_topology) complex_topology = complex_md_topology.to_openmm() complex_positions = unit.Quantity(np.zeros([protein_n_atoms + ligand_n_atoms, 3]), unit=unit.nanometers) complex_positions[:protein_n_atoms, :] = protein_positions complex_positions[protein_n_atoms:, :] = ligand_positions # Now for a system_generator self.system_generator = SystemGenerator(forcefields=forcefield_files, barostat=barostat, forcefield_kwargs=forcefield_kwargs, periodic_forcefield_kwargs=periodic_forcefield_kwargs, nonperiodic_forcefield_kwargs=nonperiodic_forcefield_kwargs, small_molecule_forcefield=small_molecule_forcefields, molecules=molecules, cache=None) # Solvate apo and complex... apo_input = list(self._solvate(protein_topology, protein_positions, water_model, phase, ionic_strength, apo_box_dimensions)) inputs = [apo_input] if ligand_input: inputs.append(self._solvate(complex_topology, complex_positions, water_model, phase, ionic_strength, complex_box_dimensions)) geometry_engine = FFAllAngleGeometryEngine(metadata=None, use_sterics=False, n_bond_divisions=100, n_angle_divisions=180, n_torsion_divisions=360, verbose=True, storage=None, bond_softening_constant=1.0, angle_softening_constant=1.0, neglect_angles = False, use_14_nonbondeds = True) # Run pipeline... htfs = [] for (top, pos, sys) in inputs: point_mutation_engine = PointMutationEngine(wildtype_topology=top, system_generator=self.system_generator, chain_id=mutation_chain_id, # Denote the chain id allowed to mutate (it's always a string variable) max_point_mutants=1, residues_allowed_to_mutate=[mutation_residue_id], # The residue ids allowed to mutate allowed_mutations=[(mutation_residue_id, proposed_residue)], # The residue ids allowed to mutate with the three-letter code allowed to change aggregate=True) # Always allow aggregation topology_proposal = point_mutation_engine.propose(sys, top) # Only validate energy bookkeeping if the WT and proposed residues do not involve rings old_res = [res for res in top.residues() if res.id == mutation_residue_id][0] validate_bool = False if old_res.name in ring_amino_acids or proposed_residue in ring_amino_acids else True new_positions, logp_proposal = geometry_engine.propose(topology_proposal, pos, beta, validate_energy_bookkeeping=validate_bool) logp_reverse = geometry_engine.logp_reverse(topology_proposal, new_positions, pos, beta, validate_energy_bookkeeping=validate_bool) if repartitioned_endstate is None: factory = HybridTopologyFactory elif repartitioned_endstate in [0, 1]: factory = RepartitionedHybridTopologyFactory forward_htf = factory(topology_proposal=topology_proposal, current_positions=pos, new_positions=new_positions, use_dispersion_correction=False, functions=None, softcore_alpha=None, bond_softening_constant=1.0, angle_softening_constant=1.0, soften_only_new=False, neglected_new_angle_terms=[], neglected_old_angle_terms=[], softcore_LJ_v2=True, softcore_electrostatics=True, softcore_LJ_v2_alpha=0.85, softcore_electrostatics_alpha=0.3, softcore_sigma_Q=1.0, interpolate_old_and_new_14s=flatten_exceptions, omitted_terms=None, endstate=repartitioned_endstate, flatten_torsions=flatten_torsions) if not topology_proposal.unique_new_atoms: assert geometry_engine.forward_final_context_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.forward_final_context_reduced_potential})" assert geometry_engine.forward_atoms_with_positions_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's forward atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.forward_atoms_with_positions_reduced_potential})" else: added_valence_energy = geometry_engine.forward_final_context_reduced_potential - geometry_engine.forward_atoms_with_positions_reduced_potential if not topology_proposal.unique_old_atoms: assert geometry_engine.reverse_final_context_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.reverse_final_context_reduced_potential})" assert geometry_engine.reverse_atoms_with_positions_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.reverse_atoms_with_positions_reduced_potential})" subtracted_valence_energy = 0.0 else: subtracted_valence_energy = geometry_engine.reverse_final_context_reduced_potential - geometry_engine.reverse_atoms_with_positions_reduced_potential if conduct_endstate_validation and repartitioned_endstate is None: zero_state_error, one_state_error = validate_endstate_energies(forward_htf._topology_proposal, forward_htf, added_valence_energy, subtracted_valence_energy, beta=beta, ENERGY_THRESHOLD=ENERGY_THRESHOLD) if zero_state_error > ENERGY_THRESHOLD: _logger.warning(f"Reduced potential difference of the nonalchemical and alchemical Lambda = 0 state is above the threshold ({ENERGY_THRESHOLD}): {zero_state_error}") if one_state_error > ENERGY_THRESHOLD: _logger.warning(f"Reduced potential difference of the nonalchemical and alchemical Lambda = 1 state is above the threshold ({ENERGY_THRESHOLD}): {one_state_error}") else: pass htfs.append(forward_htf) self.apo_htf = htfs[0] self.complex_htf = htfs[1] if ligand_input else None
def generate_complex_topologies_and_positions(ligand_filename, protein_pdb_filename): """ Generate the topologies and positions for complex phase simulations, given an input ligand file (in supported openeye format) and protein pdb file. Note that the input ligand file should have coordinates placing the ligand in the binding site. Parameters ---------- ligand_filename : str Name of the file containing ligands protein_pdb_filename : str Name of the protein pdb file Returns ------- complex_topologies_dict : dict of smiles: md.topology Dictionary of topologies for various complex systems complex_positions_dict : dict of smiles: [n, 3] array of Quantity Positions for corresponding complexes """ ifs = oechem.oemolistream() ifs.open(ligand_filename) # get the list of molecules mol_list = [oechem.OEMol(mol) for mol in ifs.GetOEMols()] for idx, mol in enumerate(mol_list): mol.SetTitle("MOL{}".format(idx)) oechem.OETriposAtomNames(mol) mol_dict = {oechem.OEMolToSmiles(mol): mol for mol in mol_list} ligand_topology_dict = { smiles: forcefield_generators.generateTopologyFromOEMol(mol) for smiles, mol in mol_dict.items() } protein_pdbfile = open(protein_pdb_filename, 'r') pdb_file = app.PDBFile(protein_pdbfile) protein_pdbfile.close() receptor_positions = pdb_file.positions receptor_topology = pdb_file.topology receptor_md_topology = md.Topology.from_openmm(receptor_topology) n_receptor_atoms = receptor_md_topology.n_atoms complex_topologies = {} complex_positions_dict = {} for smiles, ligand_topology in ligand_topology_dict.items(): ligand_md_topology = md.Topology.from_openmm(ligand_topology) n_complex_atoms = ligand_md_topology.n_atoms + n_receptor_atoms copy_receptor_md_topology = copy.deepcopy(receptor_md_topology) complex_positions = unit.Quantity(np.zeros([n_complex_atoms, 3]), unit=unit.nanometers) complex_topology = copy_receptor_md_topology.join(ligand_md_topology) complex_topologies[smiles] = complex_topology ligand_positions = extractPositionsFromOEMol(mol_dict[smiles]) complex_positions[:n_receptor_atoms, :] = receptor_positions complex_positions[n_receptor_atoms:, :] = ligand_positions complex_positions_dict[smiles] = complex_positions return complex_topologies, complex_positions_dict
def generate_solvated_hybrid_test_topology(current_mol_name="naphthalene", proposed_mol_name="benzene", current_mol_smiles=None, proposed_mol_smiles=None, vacuum=False, render_atom_mapping=False): """ This function will generate a topology proposal, old positions, and new positions with a geometry proposal (either vacuum or solvated) given a set of input iupacs or smiles. The function will (by default) read the iupac names first. If they are set to None, then it will attempt to read a set of current and new smiles. An atom mapping pdf will be generated if specified. Arguments ---------- current_mol_name : str, optional name of the first molecule proposed_mol_name : str, optional name of the second molecule current_mol_smiles : str (default None) current mol smiles proposed_mol_smiles : str (default None) proposed mol smiles vacuum: bool (default False) whether to render a vacuum or solvated topology_proposal render_atom_mapping : bool (default False) whether to render the atom map of the current_mol_name and proposed_mol_name Returns ------- topology_proposal : perses.rjmc.topology_proposal The topology proposal representing the transformation current_positions : np.array, unit-bearing The positions of the initial system new_positions : np.array, unit-bearing The positions of the new system """ import simtk.openmm.app as app from openmoltools import forcefield_generators from openeye import oechem from openmoltools.openeye import iupac_to_oemol, generate_conformers, smiles_to_oemol from openmoltools import forcefield_generators import perses.utils.openeye as openeye from perses.utils.data import get_data_filename from perses.rjmc.topology_proposal import TopologyProposal, SystemGenerator, SmallMoleculeSetProposalEngine import simtk.unit as unit from perses.rjmc.geometry import FFAllAngleGeometryEngine if current_mol_name != None and proposed_mol_name != None: try: old_oemol, new_oemol = iupac_to_oemol( current_mol_name), iupac_to_oemol(proposed_mol_name) old_smiles = oechem.OECreateSmiString( old_oemol, oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens) new_smiles = oechem.OECreateSmiString( new_oemol, oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens) except: raise Exception( f"either {current_mol_name} or {proposed_mol_name} is not compatible with 'iupac_to_oemol' function!" ) elif current_mol_smiles != None and proposed_mol_smiles != None: try: old_oemol, new_oemol = smiles_to_oemol( current_mol_smiles), smiles_to_oemol(proposed_mol_smiles) old_smiles = oechem.OECreateSmiString( old_oemol, oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens) new_smiles = oechem.OECreateSmiString( new_oemol, oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens) except: raise Exception(f"the variables are not compatible") else: raise Exception( f"either current_mol_name and proposed_mol_name must be specified as iupacs OR current_mol_smiles and proposed_mol_smiles must be specified as smiles strings." ) old_oemol, old_system, old_positions, old_topology = openeye.createSystemFromSMILES( old_smiles, title="MOL") #correct the old positions old_positions = openeye.extractPositionsFromOEMol(old_oemol) old_positions = old_positions.in_units_of(unit.nanometers) new_oemol, new_system, new_positions, new_topology = openeye.createSystemFromSMILES( new_smiles, title="NEW") ffxml = forcefield_generators.generateForceFieldFromMolecules( [old_oemol, new_oemol]) old_oemol.SetTitle('MOL') new_oemol.SetTitle('MOL') old_topology = forcefield_generators.generateTopologyFromOEMol(old_oemol) new_topology = forcefield_generators.generateTopologyFromOEMol(new_oemol) if not vacuum: nonbonded_method = app.PME barostat = openmm.MonteCarloBarostat(1.0 * unit.atmosphere, 300.0 * unit.kelvin, 50) else: nonbonded_method = app.NoCutoff barostat = None gaff_xml_filename = get_data_filename("data/gaff.xml") system_generator = SystemGenerator( [gaff_xml_filename, 'amber99sbildn.xml', 'tip3p.xml'], barostat=barostat, forcefield_kwargs={ 'removeCMMotion': False, 'nonbondedMethod': nonbonded_method, 'constraints': app.HBonds, 'hydrogenMass': 4.0 * unit.amu }) system_generator._forcefield.loadFile(StringIO(ffxml)) proposal_engine = SmallMoleculeSetProposalEngine([old_smiles, new_smiles], system_generator, residue_name='MOL') geometry_engine = FFAllAngleGeometryEngine(metadata=None, use_sterics=False, n_bond_divisions=1000, n_angle_divisions=180, n_torsion_divisions=360, verbose=True, storage=None, bond_softening_constant=1.0, angle_softening_constant=1.0, neglect_angles=False) if not vacuum: #now to solvate modeller = app.Modeller(old_topology, old_positions) hs = [ atom for atom in modeller.topology.atoms() if atom.element.symbol in ['H'] and atom.residue.name not in ['MOL', 'OLD', 'NEW'] ] modeller.delete(hs) modeller.addHydrogens(forcefield=system_generator._forcefield) modeller.addSolvent(system_generator._forcefield, model='tip3p', padding=9.0 * unit.angstroms) solvated_topology = modeller.getTopology() solvated_positions = modeller.getPositions() solvated_positions = unit.quantity.Quantity(value=np.array([ list(atom_pos) for atom_pos in solvated_positions.value_in_unit_system(unit.md_unit_system) ]), unit=unit.nanometers) solvated_system = system_generator.build_system(solvated_topology) #now to create proposal top_proposal = proposal_engine.propose( current_system=solvated_system, current_topology=solvated_topology, current_mol=old_oemol, proposed_mol=new_oemol) new_positions, _ = geometry_engine.propose(top_proposal, solvated_positions, beta) if render_atom_mapping: from perses.utils.smallmolecules import render_atom_mapping print( f"new_to_old: {proposal_engine.non_offset_new_to_old_atom_map}" ) render_atom_mapping(f"{old_smiles}to{new_smiles}.png", old_oemol, new_oemol, proposal_engine.non_offset_new_to_old_atom_map) return top_proposal, solvated_positions, new_positions else: vacuum_system = system_generator.build_system(old_topology) top_proposal = proposal_engine.propose(current_system=vacuum_system, current_topology=old_topology, current_mol=old_oemol, proposed_mol=new_oemol) new_positions, _ = geometry_engine.propose(top_proposal, old_positions, beta) if render_atom_mapping: from perses.utils.smallmolecules import render_atom_mapping print(f"new_to_old: {top_proposal._new_to_old_atom_map}") render_atom_mapping(f"{old_smiles}to{new_smiles}.png", old_oemol, new_oemol, top_proposal._new_to_old_atom_map) return top_proposal, old_positions, new_positions
def compare_energies(mol_name="naphthalene", ref_mol_name="benzene", atom_expression=['Hybridization'], bond_expression=['Hybridization']): """ Make an atom map where the molecule at either lambda endpoint is identical, and check that the energies are also the same. """ from openmmtools.constants import kB from openmmtools import alchemy, states from perses.rjmc.topology_proposal import SmallMoleculeSetProposalEngine from perses.annihilation.relative import HybridTopologyFactory from perses.rjmc.geometry import FFAllAngleGeometryEngine import simtk.openmm as openmm from perses.utils.openeye import iupac_to_oemol, extractPositionsFromOEMol, generate_conformers from perses.utils.openeye import generate_expression from openmmforcefields.generators import SystemGenerator from openmoltools.forcefield_generators import generateTopologyFromOEMol from perses.tests.utils import validate_endstate_energies temperature = 300 * unit.kelvin # Compute kT and inverse temperature. kT = kB * temperature beta = 1.0 / kT ENERGY_THRESHOLD = 1e-6 atom_expr, bond_expr = generate_expression( atom_expression), generate_expression(bond_expression) mol = iupac_to_oemol(mol_name) mol = generate_conformers(mol, max_confs=1) refmol = iupac_to_oemol(ref_mol_name) refmol = generate_conformers(refmol, max_confs=1) from openforcefield.topology import Molecule molecules = [Molecule.from_openeye(oemol) for oemol in [refmol, mol]] barostat = None forcefield_files = ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'] forcefield_kwargs = { 'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'nonbondedMethod': app.NoCutoff, 'constraints': app.HBonds, 'hydrogenMass': 4 * unit.amus } system_generator = SystemGenerator(forcefields=forcefield_files, barostat=barostat, forcefield_kwargs=forcefield_kwargs, small_molecule_forcefield='gaff-2.11', molecules=molecules, cache=None) topology = generateTopologyFromOEMol(refmol) system = system_generator.create_system(topology) positions = extractPositionsFromOEMol(refmol) proposal_engine = SmallMoleculeSetProposalEngine([refmol, mol], system_generator) proposal = proposal_engine.propose(system, topology, atom_expr=atom_expr, bond_expr=bond_expr) geometry_engine = FFAllAngleGeometryEngine() new_positions, _ = geometry_engine.propose( proposal, positions, beta=beta, validate_energy_bookkeeping=False) _ = geometry_engine.logp_reverse(proposal, new_positions, positions, beta) #make a topology proposal with the appropriate data: factory = HybridTopologyFactory(proposal, positions, new_positions) if not proposal.unique_new_atoms: assert geometry_engine.forward_final_context_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.forward_final_context_reduced_potential})" assert geometry_engine.forward_atoms_with_positions_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's forward atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.forward_atoms_with_positions_reduced_potential})" vacuum_added_valence_energy = 0.0 else: added_valence_energy = geometry_engine.forward_final_context_reduced_potential - geometry_engine.forward_atoms_with_positions_reduced_potential if not proposal.unique_old_atoms: assert geometry_engine.reverse_final_context_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.reverse_final_context_reduced_potential})" assert geometry_engine.reverse_atoms_with_positions_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.reverse_atoms_with_positions_reduced_potential})" subtracted_valence_energy = 0.0 else: subtracted_valence_energy = geometry_engine.reverse_final_context_reduced_potential - geometry_engine.reverse_atoms_with_positions_reduced_potential zero_state_error, one_state_error = validate_endstate_energies( factory._topology_proposal, factory, added_valence_energy, subtracted_valence_energy, beta=1.0 / (kB * temperature), ENERGY_THRESHOLD=ENERGY_THRESHOLD, platform=openmm.Platform.getPlatformByName('Reference')) return factory
def __init__(self, molecules: List[str], output_filename: str, ncmc_switching_times: Dict[str, int], equilibrium_steps: Dict[str, int], timestep: unit.Quantity, initial_molecule: str=None, geometry_options: Dict=None): self._molecules = [SmallMoleculeSetProposalEngine.canonicalize_smiles(molecule) for molecule in molecules] environments = ['explicit', 'vacuum'] temperature = 298.15 * unit.kelvin pressure = 1.0 * unit.atmospheres constraints = app.HBonds self._storage = NetCDFStorage(output_filename) self._ncmc_switching_times = ncmc_switching_times self._n_equilibrium_steps = equilibrium_steps self._geometry_options = geometry_options # Create a system generator for our desired forcefields. from perses.rjmc.topology_proposal import SystemGenerator system_generators = dict() from pkg_resources import resource_filename gaff_xml_filename = resource_filename('perses', 'data/gaff.xml') barostat = openmm.MonteCarloBarostat(pressure, temperature) system_generators['explicit'] = SystemGenerator([gaff_xml_filename, 'tip3p.xml'], forcefield_kwargs={'nonbondedCutoff': 9.0 * unit.angstrom, 'implicitSolvent': None, 'constraints': constraints, 'ewaldErrorTolerance': 1e-5, 'hydrogenMass': 3.0*unit.amu}, periodic_forcefield_kwargs = {'nonbondedMethod': app.PME} barostat=barostat) system_generators['vacuum'] = SystemGenerator([gaff_xml_filename], forcefield_kwargs={'implicitSolvent': None, 'constraints': constraints, 'hydrogenMass': 3.0*unit.amu}, nonperiodic_forcefield_kwargs = {'nonbondedMethod': app.NoCutoff}) # # Create topologies and positions # topologies = dict() positions = dict() from openmoltools import forcefield_generators forcefield = app.ForceField(gaff_xml_filename, 'tip3p.xml') forcefield.registerTemplateGenerator(forcefield_generators.gaffTemplateGenerator) # Create molecule in vacuum. from perses.utils.openeye import extractPositionsFromOEMol from openmoltools.openeye import smiles_to_oemol, generate_conformers if initial_molecule: smiles = initial_molecule else: smiles = np.random.choice(molecules) molecule = smiles_to_oemol(smiles) molecule = generate_conformers(molecule, max_confs=1) topologies['vacuum'] = forcefield_generators.generateTopologyFromOEMol(molecule) positions['vacuum'] = extractPositionsFromOEMol(molecule) # Create molecule in solvent. modeller = app.Modeller(topologies['vacuum'], positions['vacuum']) modeller.addSolvent(forcefield, model='tip3p', padding=9.0 * unit.angstrom) topologies['explicit'] = modeller.getTopology() positions['explicit'] = modeller.getPositions() # Set up the proposal engines. proposal_metadata = {} proposal_engines = dict() for environment in environments: proposal_engines[environment] = SmallMoleculeSetProposalEngine(self._molecules, system_generators[environment]) # Generate systems systems = dict() for environment in environments: systems[environment] = system_generators[environment].build_system(topologies[environment]) # Define thermodynamic state of interest. thermodynamic_states = dict() thermodynamic_states['explicit'] = states.ThermodynamicState(system=systems['explicit'], temperature=temperature, pressure=pressure) thermodynamic_states['vacuum'] = states.ThermodynamicState(system=systems['vacuum'], temperature=temperature) # Create SAMS samplers from perses.samplers.samplers import ExpandedEnsembleSampler, SAMSSampler mcmc_samplers = dict() exen_samplers = dict() sams_samplers = dict() for environment in environments: storage = NetCDFStorageView(self._storage, envname=environment) if self._geometry_options: n_torsion_divisions = self._geometry_options['n_torsion_divsions'][environment] use_sterics = self._geometry_options['use_sterics'][environment] else: n_torsion_divisions = 180 use_sterics = False geometry_engine = geometry.FFAllAngleGeometryEngine(storage=storage, n_torsion_divisions=n_torsion_divisions, use_sterics=use_sterics) move = mcmc.LangevinSplittingDynamicsMove(timestep=timestep, splitting="V R O R V", n_restart_attempts=10) chemical_state_key = proposal_engines[environment].compute_state_key(topologies[environment]) if environment == 'explicit': sampler_state = states.SamplerState(positions=positions[environment], box_vectors=systems[environment].getDefaultPeriodicBoxVectors()) else: sampler_state = states.SamplerState(positions=positions[environment]) mcmc_samplers[environment] = mcmc.MCMCSampler(thermodynamic_states[environment], sampler_state, move) exen_samplers[environment] = ExpandedEnsembleSampler(mcmc_samplers[environment], topologies[environment], chemical_state_key, proposal_engines[environment], geometry_engine, options={'nsteps': self._ncmc_switching_times[environment]}, storage=storage, ncmc_write_interval=self._ncmc_switching_times[environment]) exen_samplers[environment].verbose = True sams_samplers[environment] = SAMSSampler(exen_samplers[environment], storage=storage) sams_samplers[environment].verbose = True # Create test MultiTargetDesign sampler. from perses.samplers.samplers import MultiTargetDesign target_samplers = {sams_samplers['explicit']: 1.0, sams_samplers['vacuum']: -1.0} designer = MultiTargetDesign(target_samplers, storage=self._storage) # Store things. self.molecules = molecules self.environments = environments self.topologies = topologies self.positions = positions self.system_generators = system_generators self.proposal_engines = proposal_engines self.thermodynamic_states = thermodynamic_states self.mcmc_samplers = mcmc_samplers self.exen_samplers = exen_samplers self.sams_samplers = sams_samplers self.designer = designer