def test_to_file_vsites(self): """ Checks that Topology.to_file() doesn't write vsites """ from tempfile import NamedTemporaryFile from openforcefield.topology import Molecule, Topology mol = Molecule.from_pdb_and_smiles( get_data_file_path("systems/test_systems/1_ethanol.pdb"), "CCO") carbons = [atom for atom in mol.atoms if atom.atomic_number == 6] positions = mol.conformers[0] mol.add_bond_charge_virtual_site( (carbons[0], carbons[1]), 0.1 * unit.angstrom, charge_increments=[0.1, 0.05] * unit.elementary_charge, ) topology = Topology() topology.add_molecule(mol) count = 0 # The file should be printed out with 9 atoms and 0 virtualsites, so we check to ensure that thtere are only 9 HETATM entries with NamedTemporaryFile(suffix=".pdb") as iofile: topology.to_file(iofile.name, positions) data = open(iofile.name).readlines() for line in data: if line.startswith("HETATM"): count = count + 1 assert count == 9
def test_get_virtual_site(self): """Test Topology.virtual_site function (get virtual site from index)""" topology = Topology() topology.add_molecule(self.ethane_from_smiles_w_vsites) assert topology.n_topology_virtual_sites == 2 topology.add_molecule(self.propane_from_smiles_w_vsites) assert topology.n_topology_virtual_sites == 4 with self.assertRaises(Exception) as context: topology_vsite = topology.virtual_site(-1) with self.assertRaises(Exception) as context: topology_vsite = topology.virtual_site(4) topology_vsite1 = topology.virtual_site(0) topology_vsite2 = topology.virtual_site(1) topology_vsite3 = topology.virtual_site(2) topology_vsite4 = topology.virtual_site(3) assert topology_vsite1.type == "BondChargeVirtualSite" assert topology_vsite2.type == "MonovalentLonePairVirtualSite" assert topology_vsite3.type == "BondChargeVirtualSite" assert topology_vsite4.type == "MonovalentLonePairVirtualSite" n_equal_atoms = 0 for topology_atom in topology.topology_atoms: for vsite in topology.topology_virtual_sites: for vsite_atom in vsite.atoms: if topology_atom == vsite_atom: n_equal_atoms += 1 # There are four virtual sites -- Two BondCharges with 2 atoms, and two MonovalentLonePairs with 3 atoms assert n_equal_atoms == 10
def test_topology_virtualsites_atom_indexing(self): """ Add multiple instances of the same molecule, but in a different order, and ensure that virtualsite atoms are indexed correctly """ topology = Topology() topology.add_molecule(create_ethanol()) topology.add_molecule(create_ethanol()) topology.add_molecule(create_reversed_ethanol()) # Add a virtualsite to the reference ethanol for ref_mol in topology.reference_molecules: ref_mol._add_bond_charge_virtual_site( [0, 1], 0.5 * unit.angstrom, ) virtual_site_topology_atom_indices = [(0, 1), (9, 10), (26, 25)] for top_vs, expected_indices in zip( topology.topology_virtual_sites, virtual_site_topology_atom_indices): assert (tuple([at.topology_particle_index for at in top_vs.atoms]) == expected_indices) assert top_vs.atom( 0).topology_particle_index == expected_indices[0] assert top_vs.atom( 1).topology_particle_index == expected_indices[1]
def getMolParamIDToAtomIndex(molecule, forcefield): """Take a Molecule and a SMIRNOFF forcefield object and return a dictionary, keyed by parameter ID, where each entry is a tuple of ( smirks, [[atom1, ... atomN], [atom1, ... atomN]) giving the SMIRKS corresponding to that parameter ID and a list of the atom groups in that molecule that parameter is applied to. Parameters ---------- molecule : openforcefield.topology.Molecule Molecule to investigate forcefield : ForceField SMIRNOFF ForceField object (obtained from an ffxml via ForceField(ffxml)) containing FF of interest. Returns ------- param_usage : dictionary Dictionary, keyed by parameter ID, where each entry is a tuple of ( smirks, [[atom1, ... atomN], [atom1, ... atomN]) giving the SMIRKS corresponding to that parameter ID and a list of the atom groups in that molecule that parameter is applied to. """ topology = Topology() topology.add_molecule(molecule) labels = ff.labal_molecules(topology) param_usage = {} for mol_entry in range(len(labels)): for force in labels[mol_entry].keys(): for (atom_indices, pid, smirks) in labels[mol_entry][force]: if not pid in param_usage: param_usage[pid] = (smirks, [atom_indices]) else: param_usage[pid][1].append( atom_indices ) return param_usage
def test_n_topology_atoms(self): """Test n_atoms function""" topology = Topology() assert topology.n_topology_atoms == 0 assert topology.n_topology_bonds == 0 topology.add_molecule(self.ethane_from_smiles) assert topology.n_topology_atoms == 8 assert topology.n_topology_bonds == 7
def test_is_bonded(self): """Test Topology.virtual_site function (get virtual site from index)""" topology = Topology() topology.add_molecule(self.propane_from_smiles_w_vsites) topology.assert_bonded(0, 1) topology.assert_bonded(1, 0) topology.assert_bonded(1, 2) # C-H bond topology.assert_bonded(0, 4) with self.assertRaises(Exception) as context: topology.assert_bonded(0, 2)
def test_to_file_units_check(self): """ Checks whether writing pdb with unitless positions, Angstrom positions, nanometer positions, result in the same output """ import filecmp from tempfile import NamedTemporaryFile from simtk.unit import nanometer from openforcefield.tests.test_forcefield import create_ethanol from openforcefield.topology import Molecule, Topology topology = Topology() mol = Molecule.from_pdb_and_smiles( get_data_file_path("systems/test_systems/1_ethanol.pdb"), "CCO") topology.add_molecule(mol) positions_angstrom = mol.conformers[0] count = 1 # Write the molecule to PDB and ensure that the X coordinate of the first atom is 10.172 with NamedTemporaryFile(suffix=".pdb") as iofile: topology.to_file(iofile.name, positions_angstrom) data = open(iofile.name).readlines() for line in data: if line.startswith("HETATM") and count == 1: count = count + 1 coord = line.split()[-6] assert coord == "10.172" # Do the same check, but feed in equivalent positions measured in nanometers and ensure the PDB is still the same count = 1 coord = None with NamedTemporaryFile(suffix=".pdb") as iofile: positions_nanometer = positions_angstrom.in_units_of(nanometer) topology.to_file(iofile.name, positions_nanometer) data = open(iofile.name).readlines() for line in data: if line.startswith("HETATM") and count == 1: count = count + 1 coord = line.split()[-6] assert coord == "10.172" count = 1 coord = "abc" with NamedTemporaryFile(suffix=".pdb") as iofile: positions_unitless = positions_angstrom._value topology.to_file(iofile.name, positions_unitless) data = open(iofile.name).readlines() for line in data: if line.startswith("HETATM") and count == 1: count = count + 1 coord = line.split()[-6] assert coord == "10.172"
def test_to_file_multi_molecule_different_order(self): """ Checks for the following if Topology.to_write maintains the order of atoms for the same molecule with different indexing """ from tempfile import NamedTemporaryFile from openforcefield.tests.test_forcefield import ( create_ethanol, create_reversed_ethanol, ) from openforcefield.topology import Molecule, Topology topology = Topology() topology.add_molecule(create_ethanol()) topology.add_molecule(create_reversed_ethanol()) mol = Molecule.from_pdb_and_smiles( get_data_file_path("systems/test_systems/1_ethanol.pdb"), "CCO") positions = mol.conformers[0] # Make up coordinates for the second ethanol by translating the first by 10 angstroms # (note that this will still be a gibberish conformation, since the atom order in the second molecule is different) positions = np.concatenate( [positions, positions + 10.0 * unit.angstrom]) element_order = [] with NamedTemporaryFile(suffix=".pdb") as iofile: topology.to_file(iofile.name, positions) data = open(iofile.name).readlines() for line in data: if line.startswith("HETATM"): element_order.append(line.strip()[-1]) assert element_order == [ "C", "C", "O", "H", "H", "H", "H", "H", "H", "H", "H", "H", "H", "H", "H", "O", "C", "C", ]
def test_to_file_fileformat_invalid(self): """ Checks for invalid file format """ from openforcefield.tests.test_forcefield import create_ethanol from openforcefield.topology import Molecule, Topology topology = Topology() mol = Molecule.from_pdb_and_smiles( get_data_file_path("systems/test_systems/1_ethanol.pdb"), "CCO") topology.add_molecule(mol) positions = mol.conformers[0] fname = "ethanol_file.pdb" with pytest.raises(NotImplementedError): topology.to_file(fname, positions, file_format="AbC")
def test_get_atom(self): """Test Topology.atom function (atom lookup from index)""" topology = Topology() topology.add_molecule(self.ethane_from_smiles) with self.assertRaises(Exception) as context: topology_atom = topology.atom(-1) # Make sure we get 2 carbons and 8 hydrogens n_carbons = 0 n_hydrogens = 0 for index in range(8): if topology.atom(index).atomic_number == 6: n_carbons += 1 if topology.atom(index).atomic_number == 1: n_hydrogens += 1 assert n_carbons == 2 assert n_hydrogens == 6 with self.assertRaises(Exception) as context: topology_atom = topology.atom(8)
def test_topology_particles_virtualsites_indexed_last(self): """ Test to ensure that virtualsites are strictly indexed after all atoms in topology.particles """ from openforcefield.topology import TopologyAtom, TopologyVirtualSite topology = Topology() topology.add_molecule(self.ethane_from_smiles_w_vsites) topology.add_molecule(self.propane_from_smiles_w_vsites) # Iterate through all TopologyParticles, ensuring that all atoms appear # before all virtualsides reading_atoms = True for particle in topology.topology_particles: if reading_atoms: if isinstance(particle, TopologyAtom): pass else: reading_atoms = False elif not (reading_atoms): assert isinstance(particle, TopologyVirtualSite)
def test_get_bond(self): """Test Topology.bond function (bond lookup from index)""" topology = Topology() topology.add_molecule(self.ethane_from_smiles) topology.add_molecule(self.ethene_from_smiles) with self.assertRaises(Exception) as context: topology_atom = topology.bond(-1) n_single_bonds = 0 n_double_bonds = 0 n_ch_bonds = 0 n_cc_bonds = 0 for index in range(12): # 7 from ethane, 5 from ethene topology_bond = topology.bond(index) if topology_bond.bond_order == 1: n_single_bonds += 1 if topology_bond.bond_order == 2: n_double_bonds += 1 n_bond_carbons = 0 n_bond_hydrogens = 0 for atom in topology_bond.atoms: if atom.atomic_number == 6: n_bond_carbons += 1 if atom.atomic_number == 1: n_bond_hydrogens += 1 if n_bond_carbons == 2: n_cc_bonds += 1 if n_bond_carbons == 1 and n_bond_hydrogens == 1: n_ch_bonds += 1 assert n_single_bonds == 11 assert n_double_bonds == 1 assert n_cc_bonds == 2 assert n_ch_bonds == 10 with self.assertRaises(Exception) as context: topology_bond = topology.bond(12)
def test_to_file_fileformat_lettercase(self): """ Checks if fileformat specifier is indpendent of upper/lowercase """ import os from tempfile import NamedTemporaryFile from openforcefield.tests.test_forcefield import create_ethanol from openforcefield.topology import Molecule, Topology topology = Topology() mol = Molecule.from_pdb_and_smiles( get_data_file_path("systems/test_systems/1_ethanol.pdb"), "CCO") topology.add_molecule(mol) positions = mol.conformers[0] count = 1 with NamedTemporaryFile(suffix=".pdb") as iofile: topology.to_file(iofile.name, positions, file_format="pDb") data = open(iofile.name).readlines() for line in data: if line.startswith("HETATM") and count == 1: count = count + 1 coord = line.split()[-6] assert coord == "10.172"
def test_impropers(self): """Topology.impropers should return image impropers torsions of all topology molecules.""" molecule1 = self.ethane_from_smiles molecule2 = self.propane_from_smiles # Create topology. topology = Topology() topology.add_molecule(molecule1) topology.add_molecule(molecule1) topology.add_molecule(molecule2) # The topology should have the correct number of impropers. topology_impropers = list(topology.impropers) assert len(topology_impropers) == topology.n_impropers assert topology.n_impropers == 2 * molecule1.n_impropers + molecule2.n_impropers # Check that the topology impropers are the correct ones. mol_improper_atoms1 = list(molecule1.impropers) mol_improper_atoms2 = list(molecule2.impropers) top_improper_atoms1 = [ tuple(a._atom for a in atoms) for atoms in topology_impropers[:molecule1.n_impropers] ] top_improper_atoms2 = [ tuple(a._atom for a in atoms) for atoms in topology_impropers[molecule1.n_impropers:2 * molecule1.n_impropers] ] top_improper_atoms3 = [ tuple(a._atom for a in atoms) for atoms in topology_impropers[2 * molecule1.n_impropers:] ] assert_tuple_of_atoms_equal(top_improper_atoms1, mol_improper_atoms1) assert_tuple_of_atoms_equal(top_improper_atoms2, mol_improper_atoms1) assert_tuple_of_atoms_equal(top_improper_atoms3, mol_improper_atoms2)
def get_molecule_parameterIDs(molecules, forcefield): """Process a list of molecules with a specified SMIRNOFF ffxml file and determine which parameters are used by which molecules, returning collated results. Parameters ---------- molecules : list of openforcefield.topology.Molecule List of molecules (with explicit hydrogens) to parse forcefield : openforcefield.typing.engines.smirnoff.ForceField The ForceField to apply Returns ------- parameters_by_molecule : dict Parameter IDs used in each molecule, keyed by isomeric SMILES generated from provided OEMols. Each entry in the dict is a list which does not necessarily have unique entries; i.e. parameter IDs which are used more than once will occur multiple times. parameters_by_ID : dict Molecules in which each parameter ID occur, keyed by parameter ID. Each entry in the dict is a set of isomeric SMILES for molecules in which that parameter occurs. No frequency information is stored. """ from openforcefield.topology import Topology # Create storage parameters_by_molecule = dict() parameters_by_ID = dict() # Generate isomeric SMILES for each molecule, ensuring all molecules are unique isosmiles = [molecule.to_smiles() for molecule in molecules] already_seen = set() duplicates = set(smiles for smiles in isosmiles if smiles in already_seen or already_seen.add(smiles)) if len(duplicates) > 0: raise ValueError( "Error: get_molecule_parameterIDs has been provided a list of oemols which contains some duplicates: {}" .format(duplicates)) # Assemble molecules into a Topology topology = Topology() for molecule in molecules: topology.add_molecule(molecule) # Label molecules labels = forcefield.label_molecules(topology) # Organize labels into output dictionary by looping over all molecules/smiles for idx in range(len(isosmiles)): # Pull smiles, initialize storage smi = isosmiles[idx] parameters_by_molecule[smi] = [] # Organize data for this molecule data = labels[idx] for force_type in data.keys(): for atom_indices, parameter_type in data[force_type].items(): pid = parameter_type.id # Store pid to molecule parameters_by_molecule[smi].append(pid) # Store which molecule this pid occurred in if pid not in parameters_by_ID: parameters_by_ID[pid] = set() parameters_by_ID[pid].add(smi) else: parameters_by_ID[pid].add(smi) return parameters_by_molecule, parameters_by_ID