예제 #1
0
    def test_chemical_environments_matches_OE(self):
        """Test Topology.chemical_environment_matches"""
        from simtk.openmm import app

        toolkit_wrapper = OpenEyeToolkitWrapper()
        pdbfile = app.PDBFile(
            get_data_file_path(
                "systems/packmol_boxes/cyclohexane_ethanol_0.4_0.6.pdb"))
        # toolkit_wrapper = RDKitToolkitWrapper()
        molecules = [
            Molecule.from_file(get_data_file_path(name))
            for name in ("molecules/ethanol.mol2",
                         "molecules/cyclohexane.mol2")
        ]
        topology = Topology.from_openmm(pdbfile.topology,
                                        unique_molecules=molecules)
        # Test for substructure match
        matches = topology.chemical_environment_matches(
            "[C:1]-[C:2]-[O:3]", toolkit_registry=toolkit_wrapper)
        assert len(matches) == 143
        assert matches[0].topology_atom_indices == (1728, 1729, 1730)
        # Test for whole-molecule match
        matches = topology.chemical_environment_matches(
            "[H][C:1]([H])([H])-[C:2]([H])([H])-[O:3][H]",
            toolkit_registry=toolkit_wrapper,
        )
        assert (len(matches) == 1716
                )  # 143 * 12 (there are 12 possible hydrogen mappings)
        assert matches[0].topology_atom_indices == (1728, 1729, 1730)
        # Search for a substructure that isn't there
        matches = topology.chemical_environment_matches(
            "[C][C:1]-[C:2]-[O:3]", toolkit_registry=toolkit_wrapper)
        assert len(matches) == 0
예제 #2
0
    def setUp(self):
        self.empty_molecule = Molecule()
        self.ethane_from_smiles = Molecule.from_smiles("CC")
        self.ethene_from_smiles = Molecule.from_smiles("C=C")
        self.propane_from_smiles = Molecule.from_smiles("CCC")

        filename = get_data_file_path("molecules/toluene.sdf")
        self.toluene_from_sdf = Molecule.from_file(filename)
        if OpenEyeToolkitWrapper.is_available():
            filename = get_data_file_path("molecules/toluene_charged.mol2")
            # TODO: This will require openeye to load
            self.toluene_from_charged_mol2 = Molecule.from_file(filename)
        self.charged_methylamine_from_smiles = Molecule.from_smiles(
            "[H]C([H])([H])[N+]([H])([H])[H]")

        molecule = Molecule.from_smiles("CC")
        carbons = [atom for atom in molecule.atoms if atom.atomic_number == 6]
        c0_hydrogens = [
            atom for atom in carbons[0].bonded_atoms if atom.atomic_number == 1
        ]
        molecule.add_bond_charge_virtual_site(
            (carbons[0], carbons[1]),
            0.1 * unit.angstrom,
            charge_increments=[0.1, 0.05] * unit.elementary_charge,
        )
        molecule.add_monovalent_lone_pair_virtual_site(
            (c0_hydrogens[0], carbons[0], carbons[1]),
            0.2 * unit.angstrom,
            20 * unit.degree,
            25 * unit.degree,
            charge_increments=[0.01, 0.02, 0.03] * unit.elementary_charge,
        )
        self.ethane_from_smiles_w_vsites = Molecule(molecule)

        # Make a propane with virtual sites
        molecule = Molecule.from_smiles("CCC")
        carbons = [atom for atom in molecule.atoms if atom.atomic_number == 6]
        c0_hydrogens = [
            atom for atom in carbons[0].bonded_atoms if atom.atomic_number == 1
        ]
        molecule.add_bond_charge_virtual_site(
            (carbons[0], carbons[1]),
            0.1 * unit.angstrom,
            charge_increments=[0.1, 0.05] * unit.elementary_charge,
        )
        molecule.add_monovalent_lone_pair_virtual_site(
            (c0_hydrogens[0], carbons[0], carbons[1]),
            0.2 * unit.angstrom,
            20 * unit.degree,
            25 * unit.degree,
            charge_increments=[0.01, 0.02, 0.03] * unit.elementary_charge,
        )
        self.propane_from_smiles_w_vsites = Molecule(molecule)
예제 #3
0
 def test_from_openmm_duplicate_unique_mol(self):
     """Check that a DuplicateUniqueMoleculeError is raised if we try to pass in two indistinguishably unique mols"""
     from simtk.openmm import app
     pdbfile = app.PDBFile(
         get_data_file_path(
             'systems/packmol_boxes/cyclohexane_ethanol_0.4_0.6.pdb'))
     molecules = [
         Molecule.from_file(get_data_file_path(name))
         for name in ('molecules/ethanol.mol2',
                      'molecules/ethanol_reordered.mol2',
                      'molecules/cyclohexane.mol2')
     ]
     with self.assertRaises(DuplicateUniqueMoleculeError) as context:
         topology = Topology.from_openmm(pdbfile.topology,
                                         unique_molecules=molecules)
예제 #4
0
    def test_to_file_vsites(self):
        """
        Checks that Topology.to_file() doesn't write vsites
        """
        from tempfile import NamedTemporaryFile

        from openforcefield.topology import Molecule, Topology

        mol = Molecule.from_pdb_and_smiles(
            get_data_file_path("systems/test_systems/1_ethanol.pdb"), "CCO")
        carbons = [atom for atom in mol.atoms if atom.atomic_number == 6]
        positions = mol.conformers[0]
        mol.add_bond_charge_virtual_site(
            (carbons[0], carbons[1]),
            0.1 * unit.angstrom,
            charge_increments=[0.1, 0.05] * unit.elementary_charge,
        )
        topology = Topology()
        topology.add_molecule(mol)
        count = 0
        # The file should be printed out with 9 atoms and 0 virtualsites, so we check to ensure that thtere are only 9 HETATM entries
        with NamedTemporaryFile(suffix=".pdb") as iofile:
            topology.to_file(iofile.name, positions)
            data = open(iofile.name).readlines()
            for line in data:
                if line.startswith("HETATM"):
                    count = count + 1
        assert count == 9
예제 #5
0
 def test_chemical_environments_matches_RDK(self):
     """Test Topology.chemical_environment_matches"""
     from simtk.openmm import app
     toolkit_wrapper = RDKitToolkitWrapper()
     pdbfile = app.PDBFile(
         get_data_file_path(
             'systems/packmol_boxes/cyclohexane_ethanol_0.4_0.6.pdb'))
     # toolkit_wrapper = RDKitToolkitWrapper()
     #molecules = [Molecule.from_file(get_data_file_path(name)) for name in ('molecules/ethanol.mol2',
     #                                                                      'molecules/cyclohexane.mol2')]
     molecules = []
     molecules.append(Molecule.from_smiles('CCO'))
     molecules.append(Molecule.from_smiles('C1CCCCC1'))
     topology = Topology.from_openmm(pdbfile.topology,
                                     unique_molecules=molecules)
     # Count CCO matches
     matches = topology.chemical_environment_matches(
         "[C:1]-[C:2]-[O:3]", toolkit_registry=toolkit_wrapper)
     assert len(matches) == 143
     assert matches[0].topology_atom_indices == (1728, 1729, 1730)
     matches = topology.chemical_environment_matches(
         "[H][C:1]([H])([H])-[C:2]([H])([H])-[O:3][H]",
         toolkit_registry=toolkit_wrapper)
     assert len(
         matches
     ) == 1716  # 143 * 12 (there are 12 possible hydrogen mappings)
     assert matches[0].topology_atom_indices == (1728, 1729, 1730)
     # Search for a substructure that isn't there
     matches = topology.chemical_environment_matches(
         "[C][C:1]-[C:2]-[O:3]", toolkit_registry=toolkit_wrapper)
     assert len(matches) == 0
예제 #6
0
    def test_to_file_units_check(self):
        """
        Checks whether writing pdb with unitless positions, Angstrom positions,
        nanometer positions, result in the same output
        """
        import filecmp
        from tempfile import NamedTemporaryFile

        from simtk.unit import nanometer

        from openforcefield.tests.test_forcefield import create_ethanol
        from openforcefield.topology import Molecule, Topology

        topology = Topology()
        mol = Molecule.from_pdb_and_smiles(
            get_data_file_path("systems/test_systems/1_ethanol.pdb"), "CCO")
        topology.add_molecule(mol)
        positions_angstrom = mol.conformers[0]
        count = 1
        # Write the molecule to PDB and ensure that the X coordinate of the first atom is 10.172
        with NamedTemporaryFile(suffix=".pdb") as iofile:
            topology.to_file(iofile.name, positions_angstrom)
            data = open(iofile.name).readlines()
            for line in data:
                if line.startswith("HETATM") and count == 1:
                    count = count + 1
                    coord = line.split()[-6]
        assert coord == "10.172"

        # Do the same check, but feed in equivalent positions measured in nanometers and ensure the PDB is still the same
        count = 1
        coord = None
        with NamedTemporaryFile(suffix=".pdb") as iofile:
            positions_nanometer = positions_angstrom.in_units_of(nanometer)
            topology.to_file(iofile.name, positions_nanometer)
            data = open(iofile.name).readlines()
            for line in data:
                if line.startswith("HETATM") and count == 1:
                    count = count + 1
                    coord = line.split()[-6]
        assert coord == "10.172"

        count = 1
        coord = "abc"
        with NamedTemporaryFile(suffix=".pdb") as iofile:
            positions_unitless = positions_angstrom._value
            topology.to_file(iofile.name, positions_unitless)
            data = open(iofile.name).readlines()
            for line in data:
                if line.startswith("HETATM") and count == 1:
                    count = count + 1
                    coord = line.split()[-6]
        assert coord == "10.172"
예제 #7
0
    def test_from_openmm_missing_reference(self):
        """Test creation of an openforcefield Topology object from an OpenMM Topology when missing a unique molecule"""
        from simtk.openmm import app
        pdbfile = app.PDBFile(
            get_data_file_path(
                'systems/packmol_boxes/cyclohexane_ethanol_0.4_0.6.pdb'))

        molecules = [create_ethanol()]
        with pytest.raises(
                ValueError,
                match='No match found for molecule C6H12') as excinfo:
            topology = Topology.from_openmm(pdbfile.topology,
                                            unique_molecules=molecules)
예제 #8
0
    def test_from_openmm(self):
        """Test creation of an openforcefield Topology object from an OpenMM Topology and component molecules"""
        from simtk.openmm import app
        pdbfile = app.PDBFile(
            get_data_file_path(
                'systems/packmol_boxes/cyclohexane_ethanol_0.4_0.6.pdb'))

        molecules = [create_ethanol(), create_cyclohexane()]

        topology = Topology.from_openmm(pdbfile.topology,
                                        unique_molecules=molecules)
        assert topology.n_reference_molecules == 2
        assert topology.n_topology_molecules == 239
예제 #9
0
    def test_to_file_multi_molecule_different_order(self):
        """
        Checks for the following if Topology.to_write maintains the order of atoms
         for the same molecule with different indexing
        """
        from tempfile import NamedTemporaryFile

        from openforcefield.tests.test_forcefield import (
            create_ethanol,
            create_reversed_ethanol,
        )
        from openforcefield.topology import Molecule, Topology

        topology = Topology()
        topology.add_molecule(create_ethanol())
        topology.add_molecule(create_reversed_ethanol())
        mol = Molecule.from_pdb_and_smiles(
            get_data_file_path("systems/test_systems/1_ethanol.pdb"), "CCO")
        positions = mol.conformers[0]
        # Make up coordinates for the second ethanol by translating the first by 10 angstroms
        # (note that this will still be a gibberish conformation, since the atom order in the second molecule is different)
        positions = np.concatenate(
            [positions, positions + 10.0 * unit.angstrom])
        element_order = []

        with NamedTemporaryFile(suffix=".pdb") as iofile:
            topology.to_file(iofile.name, positions)
            data = open(iofile.name).readlines()
            for line in data:
                if line.startswith("HETATM"):
                    element_order.append(line.strip()[-1])
        assert element_order == [
            "C",
            "C",
            "O",
            "H",
            "H",
            "H",
            "H",
            "H",
            "H",
            "H",
            "H",
            "H",
            "H",
            "H",
            "H",
            "O",
            "C",
            "C",
        ]
예제 #10
0
    def test_to_file_fileformat_invalid(self):
        """
        Checks for invalid file format
        """
        from openforcefield.tests.test_forcefield import create_ethanol
        from openforcefield.topology import Molecule, Topology

        topology = Topology()
        mol = Molecule.from_pdb_and_smiles(
            get_data_file_path("systems/test_systems/1_ethanol.pdb"), "CCO")
        topology.add_molecule(mol)
        positions = mol.conformers[0]
        fname = "ethanol_file.pdb"
        with pytest.raises(NotImplementedError):
            topology.to_file(fname, positions, file_format="AbC")
예제 #11
0
    def test_from_openmm_missing_conect(self):
        """
        Test creation of an openforcefield Topology object from an OpenMM Topology
        when the origin PDB lacks CONECT records
        """
        from simtk.openmm import app
        pdbfile = app.PDBFile(
            get_data_file_path('systems/test_systems/1_ethanol_no_conect.pdb'))

        molecules = []
        molecules.append(Molecule.from_smiles('CCO'))
        with pytest.raises(
                ValueError,
                match='No match found for molecule C. This would be a '
                'very unusual molecule to try and parameterize, '
                'and it is likely that the data source it was '
                'read from does not contain connectivity '
                'information. If this molecule is coming from '
                'PDB, please ensure that the file contains CONECT '
                'records.') as excinfo:
            topology = Topology.from_openmm(pdbfile.topology,
                                            unique_molecules=molecules)
예제 #12
0
    def test_to_file_fileformat_lettercase(self):
        """
        Checks if fileformat specifier is indpendent of upper/lowercase
        """
        import os
        from tempfile import NamedTemporaryFile

        from openforcefield.tests.test_forcefield import create_ethanol
        from openforcefield.topology import Molecule, Topology

        topology = Topology()
        mol = Molecule.from_pdb_and_smiles(
            get_data_file_path("systems/test_systems/1_ethanol.pdb"), "CCO")
        topology.add_molecule(mol)
        positions = mol.conformers[0]
        count = 1
        with NamedTemporaryFile(suffix=".pdb") as iofile:
            topology.to_file(iofile.name, positions, file_format="pDb")
            data = open(iofile.name).readlines()
            for line in data:
                if line.startswith("HETATM") and count == 1:
                    count = count + 1
                    coord = line.split()[-6]
        assert coord == "10.172"
예제 #13
0
# datapath = './AlkEthOH_tripos/AlkEthOH_chain_filt1'
# datapath = './AlkEthOH_tripos/AlkEthOH_rings_filt1'
datapath = './AlkEthOH_tripos/AlkEthOH_test_filt1'

molname = 'AlkEthOH_r22'

mol_filepath = os.path.join(datapath, molname + '_tripos.mol2')
prmtop_filepath = os.path.join(datapath, molname + '.top')
inpcrd_filepath = os.path.join(datapath, molname + '.crd')

# Check if we have this data file; if not we have to extract the archive.
if not os.path.isdir(datapath):
    print("Extracting archived molecule files.")
    # Extract the AlkEthOH dataset shipped with the toolkit in data/molecules/ in the working directory.
    from openforcefield.tests.utils import get_data_file_path
    tarfile_path = os.path.join(get_data_file_path('molecules'),
                                'AlkEthOH_tripos.tar.gz')
    import tarfile
    with tarfile.open(tarfile_path, 'r:gz') as tar:
        tar.extractall()

# Load molecule
from openforcefield.topology import Molecule

molecule = Molecule.from_file(mol_filepath)

# Load forcefield
from openforcefield.typing.engines.smirnoff import ForceField

forcefield = ForceField('Frosst_AlkEthOH_parmAtFrosst.offxml')
예제 #14
0
versus energies from AMBER .prmtop and .crd files (parm@frosst params).
"""

import os
import glob

# datapath = './AlkEthOH_tripos/AlkEthOH_chain_filt1'
# datapath = './AlkEthOH_tripos/AlkEthOH_rings_filt1'
datapath = './AlkEthOH_tripos/AlkEthOH_test_filt1'

# Check if we have this data file; if not we have to extract the archive.
if not os.path.isdir(datapath):
    print("Extracting archived molecule files.")
    # Extract the AlkEthOH dataset shipped with the toolkit in data/molecules/ in the working directory.
    from openforcefield.tests.utils import get_data_file_path
    tarfile_path = os.path.join(get_data_file_path('molecules'), 'AlkEthOH_tripos.tar.gz')
    import tarfile
    with tarfile.open(tarfile_path, 'r:gz') as tar:
        tar.extractall()

#Obtain list of molecules
mol_filepaths = glob.glob(datapath+'/*tripos.mol2')
mol_filepaths = [fnm for fnm in mol_filepaths if not 'c1302' in fnm]  # Skip water.

print('Found {} files to test'.format(len(mol_filepaths)))

# Load forcefield
from openforcefield.typing.engines.smirnoff import ForceField
forcefield = ForceField('test_forcefields/Frosst_AlkEthOH_parmAtFrosst.offxml')

from openforcefield.topology import Molecule