def _oemol_from_residue(res): """ Get an OEMol from a residue, even if that residue is polymeric. In the latter case, external bonds are replaced by hydrogens. Parameters ---------- res : app.Residue The residue in question Returns ------- oemol : openeye.oechem.OEMol an oemol representation of the residue with topology indices """ import openeye.oechem as oechem from openmoltools.forcefield_generators import generateOEMolFromTopologyResidue external_bonds = list(res.external_bonds()) if external_bonds: for bond in external_bonds: res.chain.topology._bonds.remove(bond) mol = generateOEMolFromTopologyResidue(res, geometry=False) oechem.OEAddExplicitHydrogens(mol) return mol
def test_generate_Topology_and_OEMol(self): """ Test round-trip from OEMol >> Topology >> OEMol """ from openmoltools.forcefield_generators import generateTopologyFromOEMol, generateOEMolFromTopologyResidue from openeye import oechem, oeiupac for molecule_name in IUPAC_molecule_names: molecule1 = createOEMolFromIUPAC(molecule_name) # Generate Topology from OEMol topology = generateTopologyFromOEMol(molecule1) # Check resulting Topology. residues = [residue for residue in topology.residues()] self.assertEqual(len(residues), 1) self.assertEqual(residues[0].name, molecule1.GetTitle()) for (top_atom, mol_atom) in zip(topology.atoms(), molecule1.GetAtoms()): self.assertEqual(top_atom.name, mol_atom.GetName()) for (top_bond, mol_bond) in zip(topology.bonds(), molecule1.GetBonds()): self.assertEqual(top_bond[0].name, mol_bond.GetBgn().GetName()) self.assertEqual(top_bond[1].name, mol_bond.GetEnd().GetName()) # Generate OEMol from Topology molecule2 = generateOEMolFromTopologyResidue(residues[0]) # Check resulting molecule. self.assertEqual(molecule1.GetTitle(), molecule2.GetTitle()) for (atom1, atom2) in zip(molecule1.GetAtoms(), molecule2.GetAtoms()): self.assertEqual(atom1.GetName(), atom2.GetName()) self.assertEqual(atom1.GetAtomicNum(), atom2.GetAtomicNum()) for (bond1, bond2) in zip(molecule1.GetBonds(), molecule2.GetBonds()): self.assertEqual(bond1.GetBgn().GetName(), bond2.GetBgn().GetName()) self.assertEqual(bond1.GetEnd().GetName(), bond2.GetEnd().GetName())
def test_small_molecule_proposals(): """ Make sure the small molecule proposal engine generates molecules """ from perses.rjmc import topology_proposal from openmoltools import forcefield_generators import openeye.oechem as oechem list_of_smiles = ['CCCC','CCCCC','CCCCCC'] gaff_xml_filename = get_data_filename('data/gaff.xml') stats_dict = {smiles : 0 for smiles in list_of_smiles} system_generator = topology_proposal.SystemGenerator([gaff_xml_filename]) proposal_engine = topology_proposal.SmallMoleculeSetProposalEngine(list_of_smiles, system_generator) initial_molecule = generate_initial_molecule('CCCC') initial_system, initial_positions, initial_topology = oemol_to_omm_ff(initial_molecule, "MOL") proposal = proposal_engine.propose(initial_system, initial_topology) for i in range(50): #positions are ignored here, and we don't want to run the geometry engine new_proposal = proposal_engine.propose(proposal.old_system, proposal.old_topology) stats_dict[new_proposal.new_chemical_state_key] += 1 #check that the molecule it generated is actually the smiles we expect matching_molecules = [res for res in proposal.new_topology.residues() if res.name=='MOL'] if len(matching_molecules) != 1: raise ValueError("More than one residue with the same name!") mol_res = matching_molecules[0] oemol = forcefield_generators.generateOEMolFromTopologyResidue(mol_res) assert oechem.OEMolToSmiles(oemol) == proposal.new_chemical_state_key proposal = new_proposal
def test_small_molecule_proposals(): """ Make sure the small molecule proposal engine generates molecules """ from perses.rjmc import topology_proposal from openmoltools import forcefield_generators import openeye.oechem as oechem list_of_smiles = ['CCCC','CCCCC','CCCCCC'] gaff_xml_filename = get_data_filename('data/gaff.xml') stats_dict = {smiles : 0 for smiles in list_of_smiles} system_generator = topology_proposal.SystemGenerator([gaff_xml_filename]) proposal_engine = topology_proposal.SmallMoleculeSetProposalEngine(list_of_smiles, system_generator) initial_molecule = generate_initial_molecule('CCCC') initial_system, initial_positions, initial_topology = oemol_to_omm_ff(initial_molecule, "MOL") proposal = proposal_engine.propose(initial_system, initial_topology) for i in range(50): #positions are ignored here, and we don't want to run the geometry engine new_proposal = proposal_engine.propose(proposal.old_system, proposal.old_topology) stats_dict[new_proposal.new_chemical_state_key] += 1 #check that the molecule it generated is actually the smiles we expect matching_molecules = [res for res in proposal.new_topology.residues() if res.name=='MOL'] if len(matching_molecules) != 1: raise ValueError("More than one residue with the same name!") mol_res = matching_molecules[0] oemol = forcefield_generators.generateOEMolFromTopologyResidue(mol_res) assert oechem.OEMolToSmiles(oemol) == proposal.new_chemical_state_key proposal = new_proposal
def test_small_molecule_proposals(): """ Make sure the small molecule proposal engine generates molecules """ list_of_smiles = ['CCCC','CCCCC','CCCCCC'] list_of_mols = [] for smi in list_of_smiles: mol = smiles_to_oemol(smi) list_of_mols.append(mol) molecules = [Molecule.from_openeye(mol) for mol in list_of_mols] stats_dict = defaultdict(lambda: 0) system_generator = SystemGenerator(forcefields = forcefield_files, barostat=barostat, forcefield_kwargs=forcefield_kwargs, nonperiodic_forcefield_kwargs=nonperiodic_forcefield_kwargs, small_molecule_forcefield = small_molecule_forcefield, molecules=molecules, cache=None) proposal_engine = topology_proposal.SmallMoleculeSetProposalEngine(list_of_mols, system_generator) initial_system, initial_positions, initial_topology, = OEMol_to_omm_ff(list_of_mols[0], system_generator) proposal = proposal_engine.propose(initial_system, initial_topology) for i in range(50): #positions are ignored here, and we don't want to run the geometry engine new_proposal = proposal_engine.propose(proposal.old_system, proposal.old_topology) stats_dict[new_proposal.new_chemical_state_key] += 1 #check that the molecule it generated is actually the smiles we expect matching_molecules = [res for res in proposal.new_topology.residues() if res.name=='MOL'] if len(matching_molecules) != 1: raise ValueError("More than one residue with the same name!") mol_res = matching_molecules[0] oemol = generateOEMolFromTopologyResidue(mol_res) smiles = SmallMoleculeSetProposalEngine.canonicalize_smiles(oechem.OEMolToSmiles(oemol)) assert smiles == proposal.new_chemical_state_key proposal = new_proposal
def _oemol_from_residue(res): """ Get an OEMol from a residue, even if that residue is polymeric. In the latter case, external bonds are replaced by hydrogens. Parameters ---------- res : app.Residue The residue in question Returns ------- oemol : openeye.oechem.OEMol an oemol representation of the residue with topology indices """ import openeye.oechem as oechem from openmoltools.forcefield_generators import generateOEMolFromTopologyResidue external_bonds = list(res.external_bonds()) if external_bonds: for bond in external_bonds: res.chain.topology._bonds.remove(bond) mol = generateOEMolFromTopologyResidue(res, geometry=False) oechem.OEAddExplicitHydrogens(mol) return mol
def canonicalize_SMILES(smiles_list): """Ensure all SMILES strings end up in canonical form. Stereochemistry must already have been expanded. SMILES strings are converted to a OpenEye Topology and back again. Parameters ---------- smiles_list : list of str List of SMILES strings Returns ------- canonical_smiles_list : list of str List of SMILES strings, after canonicalization. """ # Round-trip each molecule to a Topology to end up in canonical form from openmoltools.forcefield_generators import generateOEMolFromTopologyResidue, generateTopologyFromOEMol from openeye import oechem canonical_smiles_list = list() for smiles in smiles_list: molecule = smiles_to_oemol(smiles) topology = generateTopologyFromOEMol(molecule) residues = [ residue for residue in topology.residues() ] new_molecule = generateOEMolFromTopologyResidue(residues[0]) new_smiles = oechem.OECreateIsoSmiString(new_molecule) canonical_smiles_list.append(new_smiles) return canonical_smiles_list
def canonicalize_SMILES(smiles_list): """Ensure all SMILES strings end up in canonical form. Stereochemistry must already have been expanded. SMILES strings are converted to a OpenEye Topology and back again. Parameters ---------- smiles_list : list of str List of SMILES strings Returns ------- canonical_smiles_list : list of str List of SMILES strings, after canonicalization. """ # Round-trip each molecule to a Topology to end up in canonical form from openmoltools.forcefield_generators import generateOEMolFromTopologyResidue, generateTopologyFromOEMol from perses.utils.openeye import smiles_to_oemol from openeye import oechem canonical_smiles_list = list() for smiles in smiles_list: molecule = smiles_to_oemol(smiles) topology = generateTopologyFromOEMol(molecule) residues = [ residue for residue in topology.residues() ] new_molecule = generateOEMolFromTopologyResidue(residues[0]) new_smiles = oechem.OECreateIsoSmiString(new_molecule) canonical_smiles_list.append(new_smiles) return canonical_smiles_list
def generate_vacuum_hostguest_proposal(current_mol_name="B2", proposed_mol_name="MOL"): """ Generate a test vacuum topology proposal, current positions, and new positions triplet from two IUPAC molecule names. Parameters ---------- current_mol_name : str, optional name of the first molecule proposed_mol_name : str, optional name of the second molecule Returns ------- topology_proposal : perses.rjmc.topology_proposal The topology proposal representing the transformation current_positions : np.array, unit-bearing The positions of the initial system new_positions : np.array, unit-bearing The positions of the new system """ from openmoltools import forcefield_generators from openmmtools import testsystems from perses.tests.utils import createOEMolFromIUPAC, createSystemFromIUPAC, get_data_filename host_guest = testsystems.HostGuestVacuum() unsolv_old_system, pos_old, top_old = host_guest.system, host_guest.positions, host_guest.topology ligand_topology = [res for res in top_old.residues()] current_mol = forcefield_generators.generateOEMolFromTopologyResidue(ligand_topology[1]) # guest is second residue in topology proposed_mol = createOEMolFromSMILES('C1CC2(CCC1(CC2)C)C') initial_smiles = oechem.OEMolToSmiles(current_mol) final_smiles = oechem.OEMolToSmiles(proposed_mol) gaff_xml_filename = get_data_filename("data/gaff.xml") forcefield = app.ForceField(gaff_xml_filename, 'tip3p.xml') forcefield.registerTemplateGenerator(forcefield_generators.gaffTemplateGenerator) solvated_system = forcefield.createSystem(top_old, removeCMMotion=False) gaff_filename = get_data_filename('data/gaff.xml') system_generator = SystemGenerator([gaff_filename, 'amber99sbildn.xml', 'tip3p.xml'], forcefield_kwargs={'removeCMMotion': False, 'nonbondedMethod': app.NoCutoff}) geometry_engine = geometry.FFAllAngleGeometryEngine() proposal_engine = SmallMoleculeSetProposalEngine( [initial_smiles, final_smiles], system_generator, residue_name=current_mol_name) #generate topology proposal topology_proposal = proposal_engine.propose(solvated_system, top_old, current_mol=current_mol, proposed_mol=proposed_mol) #generate new positions with geometry engine new_positions, _ = geometry_engine.propose(topology_proposal, pos_old, beta) return topology_proposal, pos_old, new_positions
def write_xml(topology, file_name): residues = [residue for residue in topology.residues()] residue = residues[0] molOE = generateOEMolFromTopologyResidue(residue, geometry=False, tripos_atom_names=True) molOE.SetTitle('MOL') ffxml = generateForceFieldFromMolecules([molOE]) f = open(file_name, 'w') f.write(ffxml) f.close()
def test_topology_molecules_round_trip(): """ Test round-trips between OEMol and Topology """ # Create a test set of molecules. molecules = [ createOEMolFromIUPAC(name) for name in IUPAC_molecule_names ] # Test round-trips. from openmoltools.forcefield_generators import generateTopologyFromOEMol, generateOEMolFromTopologyResidue for molecule in molecules: # Create topology from molecule. topology = generateTopologyFromOEMol(molecule) # Create molecule from topology. residues = [residue for residue in topology.residues()] molecule2 = generateOEMolFromTopologyResidue(residues[0]) # Create topology form molecule. topology2 = generateTopologyFromOEMol(molecule2) # Create molecule from topology with geometry. residues2 = [residue for residue in topology2.residues()] molecule3 = generateOEMolFromTopologyResidue(residues2[0], geometry=True) # Create molecule from topology with Tripos atom names molecule4 = generateOEMolFromTopologyResidue(residues2[0], tripos_atom_names=True)
def generate_ffxml(pdb_filename): from simtk.openmm.app import PDBFile, Modeller pdbfile = PDBFile(pdb_filename) residues = [ residue for residue in pdbfile.topology.residues() ] residue = residues[0] from openmoltools.forcefield_generators import generateForceFieldFromMolecules, generateOEMolFromTopologyResidue molecule = generateOEMolFromTopologyResidue(residue, geometry=False, tripos_atom_names=True) molecule.SetTitle('MOL') molecules = [molecule] ffxml = generateForceFieldFromMolecules(molecules) outfile = open('imatinib.xml', 'w') outfile.write(ffxml) outfile.close()
def test_atom_topology_index(): """ Make sure that generateOEMolFromTopologyResidue adds the topology_index data """ # Create a test set of molecules. molecules = [ createOEMolFromIUPAC(name) for name in IUPAC_molecule_names ] from openmoltools.forcefield_generators import generateTopologyFromOEMol, generateOEMolFromTopologyResidue topologies = [generateTopologyFromOEMol(molecule) for molecule in molecules] for topology in topologies: residue = list(topology.residues())[0] #there is only one residue regenerated_mol = generateOEMolFromTopologyResidue(residue) for i, top_atom in enumerate(topology.atoms()): oeatom = regenerated_mol.GetAtom(oechem.OEHasAtomIdx(top_atom.index)) assert oeatom.GetData("topology_index")==top_atom.index
def test_topology_molecules_round_trip(): """ Test round-trips between OEMol and Topology """ # Create a test set of molecules. molecules = [createOEMolFromIUPAC(name) for name in IUPAC_molecule_names] # Test round-trips. from openmoltools.forcefield_generators import generateTopologyFromOEMol, generateOEMolFromTopologyResidue for molecule in molecules: # Create topology from molecule. topology = generateTopologyFromOEMol(molecule) # Create molecule from topology. residues = [residue for residue in topology.residues()] molecule2 = generateOEMolFromTopologyResidue(residues[0]) # Create topology form molecule. topology2 = generateTopologyFromOEMol(molecule2) # Create molecule from topology with geometry. residues2 = [residue for residue in topology2.residues()] molecule3 = generateOEMolFromTopologyResidue(residues2[0], geometry=True) # Create molecule from topology with Tripos atom names molecule4 = generateOEMolFromTopologyResidue(residues2[0], tripos_atom_names=True)
def generate_ffxml(pdb_filename): from simtk.openmm.app import PDBFile, Modeller pdbfile = PDBFile(pdb_filename) residues = [residue for residue in pdbfile.topology.residues()] residue = residues[0] from openmoltools.forcefield_generators import generateForceFieldFromMolecules, generateOEMolFromTopologyResidue molecule = generateOEMolFromTopologyResidue(residue, geometry=False, tripos_atom_names=True) molecule.SetTitle('MOL') molecules = [molecule] ffxml = generateForceFieldFromMolecules(molecules) outfile = open('imatinib.xml', 'w') outfile.write(ffxml) outfile.close()
def test_atom_topology_index(): """ Make sure that generateOEMolFromTopologyResidue adds the topology_index data """ # Create a test set of molecules. molecules = [createOEMolFromIUPAC(name) for name in IUPAC_molecule_names] from openmoltools.forcefield_generators import generateTopologyFromOEMol, generateOEMolFromTopologyResidue topologies = [ generateTopologyFromOEMol(molecule) for molecule in molecules ] for topology in topologies: residue = list(topology.residues())[0] #there is only one residue regenerated_mol = generateOEMolFromTopologyResidue(residue) for i, top_atom in enumerate(topology.atoms()): oeatom = regenerated_mol.GetAtom( oechem.OEHasAtomIdx(top_atom.index)) assert oeatom.GetData("topology_index") == top_atom.index
def test_generate_Topology_and_OEMol(self): """ Test round-trip from OEMol >> Topology >> OEMol """ from openmoltools.forcefield_generators import generateTopologyFromOEMol, generateOEMolFromTopologyResidue from openeye import oechem, oeiupac for molecule_name in IUPAC_molecule_names: molecule1 = createOEMolFromIUPAC(molecule_name) # Generate Topology from OEMol topology = generateTopologyFromOEMol(molecule1) # Check resulting Topology. residues = [residue for residue in topology.residues()] self.assertEqual(len(residues), 1) self.assertEqual(residues[0].name, molecule1.GetTitle()) for (top_atom, mol_atom) in zip(topology.atoms(), molecule1.GetAtoms()): self.assertEqual(top_atom.name, mol_atom.GetName()) for (top_bond, mol_bond) in zip(topology.bonds(), molecule1.GetBonds()): self.assertEqual(top_bond[0].name, mol_bond.GetBgn().GetName()) self.assertEqual(top_bond[1].name, mol_bond.GetEnd().GetName()) # Generate OEMol from Topology molecule2 = generateOEMolFromTopologyResidue(residues[0]) # Check resulting molecule. self.assertEqual(molecule1.GetTitle(), molecule2.GetTitle()) for (atom1, atom2) in zip(molecule1.GetAtoms(), molecule2.GetAtoms()): self.assertEqual(atom1.GetName(), atom2.GetName()) self.assertEqual(atom1.GetAtomicNum(), atom2.GetAtomicNum()) for (bond1, bond2) in zip(molecule1.GetBonds(), molecule2.GetBonds()): self.assertEqual(bond1.GetBgn().GetName(), bond2.GetBgn().GetName()) self.assertEqual(bond1.GetEnd().GetName(), bond2.GetEnd().GetName())
def generate_vacuum_hostguest_proposal(current_mol_name="B2", proposed_mol_name="MOL"): """ Generate a test vacuum topology proposal, current positions, and new positions triplet from two IUPAC molecule names. Parameters ---------- current_mol_name : str, optional name of the first molecule proposed_mol_name : str, optional name of the second molecule Returns ------- topology_proposal : perses.rjmc.topology_proposal The topology proposal representing the transformation current_positions : np.array, unit-bearing The positions of the initial system new_positions : np.array, unit-bearing The positions of the new system """ from openmoltools import forcefield_generators from openmmtools import testsystems from perses.utils.openeye import smiles_to_oemol from perses.utils.data import get_data_filename host_guest = testsystems.HostGuestVacuum() unsolv_old_system, old_positions, top_old = host_guest.system, host_guest.positions, host_guest.topology ligand_topology = [res for res in top_old.residues()] current_mol = forcefield_generators.generateOEMolFromTopologyResidue( ligand_topology[1]) # guest is second residue in topology proposed_mol = smiles_to_oemol('C1CC2(CCC1(CC2)C)C') initial_smiles = oechem.OEMolToSmiles(current_mol) final_smiles = oechem.OEMolToSmiles(proposed_mol) gaff_xml_filename = get_data_filename("data/gaff.xml") forcefield = app.ForceField(gaff_xml_filename, 'tip3p.xml') forcefield.registerTemplateGenerator( forcefield_generators.gaffTemplateGenerator) solvated_system = forcefield.createSystem(top_old, removeCMMotion=False) gaff_filename = get_data_filename('data/gaff.xml') system_generator = SystemGenerator( [gaff_filename, 'amber99sbildn.xml', 'tip3p.xml'], forcefield_kwargs={ 'removeCMMotion': False, 'nonbondedMethod': app.NoCutoff }) geometry_engine = geometry.FFAllAngleGeometryEngine() proposal_engine = SmallMoleculeSetProposalEngine( [initial_smiles, final_smiles], system_generator, residue_name=current_mol_name) #generate topology proposal topology_proposal = proposal_engine.propose(solvated_system, top_old, current_mol=current_mol, proposed_mol=proposed_mol) #generate new positions with geometry engine new_positions, _ = geometry_engine.propose(topology_proposal, old_positions, beta) return topology_proposal, old_positions, new_positions