def test_chemical_environments_matches_OE(self): """Test Topology.chemical_environment_matches""" from simtk.openmm import app toolkit_wrapper = OpenEyeToolkitWrapper() pdbfile = app.PDBFile( get_data_file_path( "systems/packmol_boxes/cyclohexane_ethanol_0.4_0.6.pdb")) # toolkit_wrapper = RDKitToolkitWrapper() molecules = [ Molecule.from_file(get_data_file_path(name)) for name in ("molecules/ethanol.mol2", "molecules/cyclohexane.mol2") ] topology = Topology.from_openmm(pdbfile.topology, unique_molecules=molecules) # Test for substructure match matches = topology.chemical_environment_matches( "[C:1]-[C:2]-[O:3]", toolkit_registry=toolkit_wrapper) assert len(matches) == 143 assert matches[0].topology_atom_indices == (1728, 1729, 1730) # Test for whole-molecule match matches = topology.chemical_environment_matches( "[H][C:1]([H])([H])-[C:2]([H])([H])-[O:3][H]", toolkit_registry=toolkit_wrapper, ) assert (len(matches) == 1716 ) # 143 * 12 (there are 12 possible hydrogen mappings) assert matches[0].topology_atom_indices == (1728, 1729, 1730) # Search for a substructure that isn't there matches = topology.chemical_environment_matches( "[C][C:1]-[C:2]-[O:3]", toolkit_registry=toolkit_wrapper) assert len(matches) == 0
def setUp(self): self.empty_molecule = Molecule() self.ethane_from_smiles = Molecule.from_smiles("CC") self.ethene_from_smiles = Molecule.from_smiles("C=C") self.propane_from_smiles = Molecule.from_smiles("CCC") filename = get_data_file_path("molecules/toluene.sdf") self.toluene_from_sdf = Molecule.from_file(filename) if OpenEyeToolkitWrapper.is_available(): filename = get_data_file_path("molecules/toluene_charged.mol2") # TODO: This will require openeye to load self.toluene_from_charged_mol2 = Molecule.from_file(filename) self.charged_methylamine_from_smiles = Molecule.from_smiles( "[H]C([H])([H])[N+]([H])([H])[H]") molecule = Molecule.from_smiles("CC") carbons = [atom for atom in molecule.atoms if atom.atomic_number == 6] c0_hydrogens = [ atom for atom in carbons[0].bonded_atoms if atom.atomic_number == 1 ] molecule.add_bond_charge_virtual_site( (carbons[0], carbons[1]), 0.1 * unit.angstrom, charge_increments=[0.1, 0.05] * unit.elementary_charge, ) molecule.add_monovalent_lone_pair_virtual_site( (c0_hydrogens[0], carbons[0], carbons[1]), 0.2 * unit.angstrom, 20 * unit.degree, 25 * unit.degree, charge_increments=[0.01, 0.02, 0.03] * unit.elementary_charge, ) self.ethane_from_smiles_w_vsites = Molecule(molecule) # Make a propane with virtual sites molecule = Molecule.from_smiles("CCC") carbons = [atom for atom in molecule.atoms if atom.atomic_number == 6] c0_hydrogens = [ atom for atom in carbons[0].bonded_atoms if atom.atomic_number == 1 ] molecule.add_bond_charge_virtual_site( (carbons[0], carbons[1]), 0.1 * unit.angstrom, charge_increments=[0.1, 0.05] * unit.elementary_charge, ) molecule.add_monovalent_lone_pair_virtual_site( (c0_hydrogens[0], carbons[0], carbons[1]), 0.2 * unit.angstrom, 20 * unit.degree, 25 * unit.degree, charge_increments=[0.01, 0.02, 0.03] * unit.elementary_charge, ) self.propane_from_smiles_w_vsites = Molecule(molecule)
def test_from_openmm_duplicate_unique_mol(self): """Check that a DuplicateUniqueMoleculeError is raised if we try to pass in two indistinguishably unique mols""" from simtk.openmm import app pdbfile = app.PDBFile( get_data_file_path( 'systems/packmol_boxes/cyclohexane_ethanol_0.4_0.6.pdb')) molecules = [ Molecule.from_file(get_data_file_path(name)) for name in ('molecules/ethanol.mol2', 'molecules/ethanol_reordered.mol2', 'molecules/cyclohexane.mol2') ] with self.assertRaises(DuplicateUniqueMoleculeError) as context: topology = Topology.from_openmm(pdbfile.topology, unique_molecules=molecules)
def test_to_file_vsites(self): """ Checks that Topology.to_file() doesn't write vsites """ from tempfile import NamedTemporaryFile from openforcefield.topology import Molecule, Topology mol = Molecule.from_pdb_and_smiles( get_data_file_path("systems/test_systems/1_ethanol.pdb"), "CCO") carbons = [atom for atom in mol.atoms if atom.atomic_number == 6] positions = mol.conformers[0] mol.add_bond_charge_virtual_site( (carbons[0], carbons[1]), 0.1 * unit.angstrom, charge_increments=[0.1, 0.05] * unit.elementary_charge, ) topology = Topology() topology.add_molecule(mol) count = 0 # The file should be printed out with 9 atoms and 0 virtualsites, so we check to ensure that thtere are only 9 HETATM entries with NamedTemporaryFile(suffix=".pdb") as iofile: topology.to_file(iofile.name, positions) data = open(iofile.name).readlines() for line in data: if line.startswith("HETATM"): count = count + 1 assert count == 9
def test_chemical_environments_matches_RDK(self): """Test Topology.chemical_environment_matches""" from simtk.openmm import app toolkit_wrapper = RDKitToolkitWrapper() pdbfile = app.PDBFile( get_data_file_path( 'systems/packmol_boxes/cyclohexane_ethanol_0.4_0.6.pdb')) # toolkit_wrapper = RDKitToolkitWrapper() #molecules = [Molecule.from_file(get_data_file_path(name)) for name in ('molecules/ethanol.mol2', # 'molecules/cyclohexane.mol2')] molecules = [] molecules.append(Molecule.from_smiles('CCO')) molecules.append(Molecule.from_smiles('C1CCCCC1')) topology = Topology.from_openmm(pdbfile.topology, unique_molecules=molecules) # Count CCO matches matches = topology.chemical_environment_matches( "[C:1]-[C:2]-[O:3]", toolkit_registry=toolkit_wrapper) assert len(matches) == 143 assert matches[0].topology_atom_indices == (1728, 1729, 1730) matches = topology.chemical_environment_matches( "[H][C:1]([H])([H])-[C:2]([H])([H])-[O:3][H]", toolkit_registry=toolkit_wrapper) assert len( matches ) == 1716 # 143 * 12 (there are 12 possible hydrogen mappings) assert matches[0].topology_atom_indices == (1728, 1729, 1730) # Search for a substructure that isn't there matches = topology.chemical_environment_matches( "[C][C:1]-[C:2]-[O:3]", toolkit_registry=toolkit_wrapper) assert len(matches) == 0
def test_to_file_units_check(self): """ Checks whether writing pdb with unitless positions, Angstrom positions, nanometer positions, result in the same output """ import filecmp from tempfile import NamedTemporaryFile from simtk.unit import nanometer from openforcefield.tests.test_forcefield import create_ethanol from openforcefield.topology import Molecule, Topology topology = Topology() mol = Molecule.from_pdb_and_smiles( get_data_file_path("systems/test_systems/1_ethanol.pdb"), "CCO") topology.add_molecule(mol) positions_angstrom = mol.conformers[0] count = 1 # Write the molecule to PDB and ensure that the X coordinate of the first atom is 10.172 with NamedTemporaryFile(suffix=".pdb") as iofile: topology.to_file(iofile.name, positions_angstrom) data = open(iofile.name).readlines() for line in data: if line.startswith("HETATM") and count == 1: count = count + 1 coord = line.split()[-6] assert coord == "10.172" # Do the same check, but feed in equivalent positions measured in nanometers and ensure the PDB is still the same count = 1 coord = None with NamedTemporaryFile(suffix=".pdb") as iofile: positions_nanometer = positions_angstrom.in_units_of(nanometer) topology.to_file(iofile.name, positions_nanometer) data = open(iofile.name).readlines() for line in data: if line.startswith("HETATM") and count == 1: count = count + 1 coord = line.split()[-6] assert coord == "10.172" count = 1 coord = "abc" with NamedTemporaryFile(suffix=".pdb") as iofile: positions_unitless = positions_angstrom._value topology.to_file(iofile.name, positions_unitless) data = open(iofile.name).readlines() for line in data: if line.startswith("HETATM") and count == 1: count = count + 1 coord = line.split()[-6] assert coord == "10.172"
def test_from_openmm_missing_reference(self): """Test creation of an openforcefield Topology object from an OpenMM Topology when missing a unique molecule""" from simtk.openmm import app pdbfile = app.PDBFile( get_data_file_path( 'systems/packmol_boxes/cyclohexane_ethanol_0.4_0.6.pdb')) molecules = [create_ethanol()] with pytest.raises( ValueError, match='No match found for molecule C6H12') as excinfo: topology = Topology.from_openmm(pdbfile.topology, unique_molecules=molecules)
def test_from_openmm(self): """Test creation of an openforcefield Topology object from an OpenMM Topology and component molecules""" from simtk.openmm import app pdbfile = app.PDBFile( get_data_file_path( 'systems/packmol_boxes/cyclohexane_ethanol_0.4_0.6.pdb')) molecules = [create_ethanol(), create_cyclohexane()] topology = Topology.from_openmm(pdbfile.topology, unique_molecules=molecules) assert topology.n_reference_molecules == 2 assert topology.n_topology_molecules == 239
def test_to_file_multi_molecule_different_order(self): """ Checks for the following if Topology.to_write maintains the order of atoms for the same molecule with different indexing """ from tempfile import NamedTemporaryFile from openforcefield.tests.test_forcefield import ( create_ethanol, create_reversed_ethanol, ) from openforcefield.topology import Molecule, Topology topology = Topology() topology.add_molecule(create_ethanol()) topology.add_molecule(create_reversed_ethanol()) mol = Molecule.from_pdb_and_smiles( get_data_file_path("systems/test_systems/1_ethanol.pdb"), "CCO") positions = mol.conformers[0] # Make up coordinates for the second ethanol by translating the first by 10 angstroms # (note that this will still be a gibberish conformation, since the atom order in the second molecule is different) positions = np.concatenate( [positions, positions + 10.0 * unit.angstrom]) element_order = [] with NamedTemporaryFile(suffix=".pdb") as iofile: topology.to_file(iofile.name, positions) data = open(iofile.name).readlines() for line in data: if line.startswith("HETATM"): element_order.append(line.strip()[-1]) assert element_order == [ "C", "C", "O", "H", "H", "H", "H", "H", "H", "H", "H", "H", "H", "H", "H", "O", "C", "C", ]
def test_to_file_fileformat_invalid(self): """ Checks for invalid file format """ from openforcefield.tests.test_forcefield import create_ethanol from openforcefield.topology import Molecule, Topology topology = Topology() mol = Molecule.from_pdb_and_smiles( get_data_file_path("systems/test_systems/1_ethanol.pdb"), "CCO") topology.add_molecule(mol) positions = mol.conformers[0] fname = "ethanol_file.pdb" with pytest.raises(NotImplementedError): topology.to_file(fname, positions, file_format="AbC")
def test_from_openmm_missing_conect(self): """ Test creation of an openforcefield Topology object from an OpenMM Topology when the origin PDB lacks CONECT records """ from simtk.openmm import app pdbfile = app.PDBFile( get_data_file_path('systems/test_systems/1_ethanol_no_conect.pdb')) molecules = [] molecules.append(Molecule.from_smiles('CCO')) with pytest.raises( ValueError, match='No match found for molecule C. This would be a ' 'very unusual molecule to try and parameterize, ' 'and it is likely that the data source it was ' 'read from does not contain connectivity ' 'information. If this molecule is coming from ' 'PDB, please ensure that the file contains CONECT ' 'records.') as excinfo: topology = Topology.from_openmm(pdbfile.topology, unique_molecules=molecules)
def test_to_file_fileformat_lettercase(self): """ Checks if fileformat specifier is indpendent of upper/lowercase """ import os from tempfile import NamedTemporaryFile from openforcefield.tests.test_forcefield import create_ethanol from openforcefield.topology import Molecule, Topology topology = Topology() mol = Molecule.from_pdb_and_smiles( get_data_file_path("systems/test_systems/1_ethanol.pdb"), "CCO") topology.add_molecule(mol) positions = mol.conformers[0] count = 1 with NamedTemporaryFile(suffix=".pdb") as iofile: topology.to_file(iofile.name, positions, file_format="pDb") data = open(iofile.name).readlines() for line in data: if line.startswith("HETATM") and count == 1: count = count + 1 coord = line.split()[-6] assert coord == "10.172"
# datapath = './AlkEthOH_tripos/AlkEthOH_chain_filt1' # datapath = './AlkEthOH_tripos/AlkEthOH_rings_filt1' datapath = './AlkEthOH_tripos/AlkEthOH_test_filt1' molname = 'AlkEthOH_r22' mol_filepath = os.path.join(datapath, molname + '_tripos.mol2') prmtop_filepath = os.path.join(datapath, molname + '.top') inpcrd_filepath = os.path.join(datapath, molname + '.crd') # Check if we have this data file; if not we have to extract the archive. if not os.path.isdir(datapath): print("Extracting archived molecule files.") # Extract the AlkEthOH dataset shipped with the toolkit in data/molecules/ in the working directory. from openforcefield.tests.utils import get_data_file_path tarfile_path = os.path.join(get_data_file_path('molecules'), 'AlkEthOH_tripos.tar.gz') import tarfile with tarfile.open(tarfile_path, 'r:gz') as tar: tar.extractall() # Load molecule from openforcefield.topology import Molecule molecule = Molecule.from_file(mol_filepath) # Load forcefield from openforcefield.typing.engines.smirnoff import ForceField forcefield = ForceField('Frosst_AlkEthOH_parmAtFrosst.offxml')
versus energies from AMBER .prmtop and .crd files (parm@frosst params). """ import os import glob # datapath = './AlkEthOH_tripos/AlkEthOH_chain_filt1' # datapath = './AlkEthOH_tripos/AlkEthOH_rings_filt1' datapath = './AlkEthOH_tripos/AlkEthOH_test_filt1' # Check if we have this data file; if not we have to extract the archive. if not os.path.isdir(datapath): print("Extracting archived molecule files.") # Extract the AlkEthOH dataset shipped with the toolkit in data/molecules/ in the working directory. from openforcefield.tests.utils import get_data_file_path tarfile_path = os.path.join(get_data_file_path('molecules'), 'AlkEthOH_tripos.tar.gz') import tarfile with tarfile.open(tarfile_path, 'r:gz') as tar: tar.extractall() #Obtain list of molecules mol_filepaths = glob.glob(datapath+'/*tripos.mol2') mol_filepaths = [fnm for fnm in mol_filepaths if not 'c1302' in fnm] # Skip water. print('Found {} files to test'.format(len(mol_filepaths))) # Load forcefield from openforcefield.typing.engines.smirnoff import ForceField forcefield = ForceField('test_forcefields/Frosst_AlkEthOH_parmAtFrosst.offxml') from openforcefield.topology import Molecule