def test_chemical_environments_matches_RDK(self): """Test Topology.chemical_environment_matches""" from simtk.openmm import app toolkit_wrapper = RDKitToolkitWrapper() pdbfile = app.PDBFile( get_data_file_path( "systems/packmol_boxes/cyclohexane_ethanol_0.4_0.6.pdb")) # toolkit_wrapper = RDKitToolkitWrapper() # molecules = [Molecule.from_file(get_data_file_path(name)) for name in ('molecules/ethanol.mol2', # 'molecules/cyclohexane.mol2')] molecules = [] molecules.append(Molecule.from_smiles("CCO")) molecules.append(Molecule.from_smiles("C1CCCCC1")) topology = Topology.from_openmm(pdbfile.topology, unique_molecules=molecules) # Count CCO matches matches = topology.chemical_environment_matches( "[C:1]-[C:2]-[O:3]", toolkit_registry=toolkit_wrapper) assert len(matches) == 143 assert matches[0].topology_atom_indices == (1728, 1729, 1730) matches = topology.chemical_environment_matches( "[H][C:1]([H])([H])-[C:2]([H])([H])-[O:3][H]", toolkit_registry=toolkit_wrapper, ) assert (len(matches) == 1716 ) # 143 * 12 (there are 12 possible hydrogen mappings) assert matches[0].topology_atom_indices == (1728, 1729, 1730) # Search for a substructure that isn't there matches = topology.chemical_environment_matches( "[C][C:1]-[C:2]-[O:3]", toolkit_registry=toolkit_wrapper) assert len(matches) == 0
def serialise_system(self): """Create the OpenMM system; parametrise using frost; serialise the system.""" # Load the molecule using openforcefield pdb_file = app.PDBFile(f'{self.molecule.name}.pdb') # Now we need the connection info try using smiles string from rdkit rdkit = RDKit() molecule = Molecule.from_smiles( rdkit.get_smiles(f'{self.molecule.name}.pdb')) # Make the openMM system omm_topology = pdb_file.topology off_topology = Topology.from_openmm(omm_topology, unique_molecules=[molecule]) # Load the smirnoff99Frosst force field. forcefield = ForceField('test_forcefields/smirnoff99Frosst.offxml') # Parametrize the topology and create an OpenMM System. system = forcefield.create_openmm_system(off_topology) # Serialise the OpenMM system into the xml file with open('serialised.xml', 'w+') as out: out.write(XmlSerializer.serializeSystem(system))
def test_chemical_environments_matches_OE(self): """Test Topology.chemical_environment_matches""" from simtk.openmm import app toolkit_wrapper = OpenEyeToolkitWrapper() pdbfile = app.PDBFile( get_data_file_path( 'systems/packmol_boxes/cyclohexane_ethanol_0.4_0.6.pdb')) # toolkit_wrapper = RDKitToolkitWrapper() molecules = [ Molecule.from_file(get_data_file_path(name)) for name in ('molecules/ethanol.mol2', 'molecules/cyclohexane.mol2') ] topology = Topology.from_openmm(pdbfile.topology, unique_molecules=molecules) # Test for substructure match matches = topology.chemical_environment_matches( "[C:1]-[C:2]-[O:3]", toolkit_registry=toolkit_wrapper) assert len(matches) == 143 assert tuple(i.topology_atom_index for i in matches[0]) == (1728, 1729, 1730) # Test for whole-molecule match matches = topology.chemical_environment_matches( "[H][C:1]([H])([H])-[C:2]([H])([H])-[O:3][H]", toolkit_registry=toolkit_wrapper) assert len( matches ) == 1716 # 143 * 12 (there are 12 possible hydrogen mappings) assert tuple(i.topology_atom_index for i in matches[0]) == (1728, 1729, 1730) # Search for a substructure that isn't there matches = topology.chemical_environment_matches( "[C][C:1]-[C:2]-[O:3]", toolkit_registry=toolkit_wrapper) assert len(matches) == 0
def _execute(self, directory, available_resources): from openforcefield.topology import Molecule, Topology pdb_file = app.PDBFile(self.coordinate_file_path) force_field_source = ForceFieldSource.from_json(self.force_field_path) if not isinstance(force_field_source, SmirnoffForceFieldSource): raise ValueError( "Only SMIRNOFF force fields are supported by this protocol.") force_field = force_field_source.to_force_field() unique_molecules = [] charged_molecules = [] if self.apply_known_charges: charged_molecules = self._generate_known_charged_molecules() # Load in any additional, user specified charged molecules. for charged_molecule_path in self.charged_molecule_paths: charged_molecule = Molecule.from_file(charged_molecule_path, "MOL2") charged_molecules.append(charged_molecule) for component in self.substance.components: molecule = Molecule.from_smiles(smiles=component.smiles) if molecule is None: raise ValueError( f"{component} could not be converted to a Molecule") unique_molecules.append(molecule) topology = Topology.from_openmm(pdb_file.topology, unique_molecules=unique_molecules) if len(charged_molecules) > 0: system = force_field.create_openmm_system( topology, charge_from_molecules=charged_molecules) else: system = force_field.create_openmm_system(topology) if system is None: raise RuntimeError( "Failed to create a system from the specified topology and molecules." ) system_xml = openmm.XmlSerializer.serialize(system) self.system_path = os.path.join(directory, "system.xml") with open(self.system_path, "w") as file: file.write(system_xml)
def test_to_from_openmm(self): """Test a round-trip OpenFF -> OpenMM -> OpenFF Topology.""" from simtk.openmm.app import Aromatic # Create OpenFF topology with 1 ethanol and 2 benzenes. ethanol = Molecule.from_smiles("CCO") benzene = Molecule.from_smiles("c1ccccc1") off_topology = Topology.from_molecules( molecules=[ethanol, benzene, benzene]) # Convert to OpenMM Topology. omm_topology = off_topology.to_openmm() # Check that bond orders are preserved. n_double_bonds = sum([b.order == 2 for b in omm_topology.bonds()]) n_aromatic_bonds = sum( [b.type is Aromatic for b in omm_topology.bonds()]) assert n_double_bonds == 6 assert n_aromatic_bonds == 12 # Check that there is one residue for each molecule. assert omm_topology.getNumResidues() == 3 assert omm_topology.getNumChains() == 3 # Convert back to OpenFF Topology. off_topology_copy = Topology.from_openmm( omm_topology, unique_molecules=[ethanol, benzene]) # The round-trip OpenFF Topology is identical to the original. # The reference molecules are the same. assert (off_topology.n_reference_molecules == off_topology_copy.n_reference_molecules) reference_molecules_copy = list(off_topology_copy.reference_molecules) for ref_mol_idx, ref_mol in enumerate( off_topology.reference_molecules): assert ref_mol == reference_molecules_copy[ref_mol_idx] # The number of topology molecules is the same. assert (off_topology.n_topology_molecules == off_topology_copy.n_topology_molecules) # Check atoms. assert off_topology.n_topology_atoms == off_topology_copy.n_topology_atoms for atom_idx, atom in enumerate(off_topology.topology_atoms): atom_copy = off_topology_copy.atom(atom_idx) assert atom.atomic_number == atom_copy.atomic_number # Check bonds. for bond_idx, bond in enumerate(off_topology.topology_bonds): bond_copy = off_topology_copy.bond(bond_idx) bond_atoms = [a.atomic_number for a in bond.atoms] bond_atoms_copy = [a.atomic_number for a in bond_copy.atoms] assert bond_atoms == bond_atoms_copy assert bond.bond_order == bond_copy.bond_order assert bond.bond.is_aromatic == bond_copy.bond.is_aromatic
def make_off_system(mol, ID): ##Now over to OpenForceField: drug_pdbfile = PDBFile('./processed_data/aligned_drugs/drug_' + str(ID) + '.pdb') drug_mol = Molecule.from_smiles(Chem.MolToSmiles(mol)) off_topology = Topology.from_openmm(openmm_topology=drug_pdbfile.topology, unique_molecules=[drug_mol]) #actual parameterizing step: drug_system = ff.create_openmm_system(off_topology) return drug_system
def test_from_openmm(self): """Test creation of an openforcefield Topology object from an OpenMM Topology and component molecules""" from simtk.openmm import app pdbfile = app.PDBFile( get_data_file_path( 'systems/packmol_boxes/cyclohexane_ethanol_0.4_0.6.pdb')) molecules = [create_ethanol(), create_cyclohexane()] topology = Topology.from_openmm(pdbfile.topology, unique_molecules=molecules) assert topology.n_reference_molecules == 2 assert topology.n_topology_molecules == 239
def test_from_openmm_missing_reference(self): """Test creation of an openforcefield Topology object from an OpenMM Topology when missing a unique molecule""" from simtk.openmm import app pdbfile = app.PDBFile( get_data_file_path( 'systems/packmol_boxes/cyclohexane_ethanol_0.4_0.6.pdb')) molecules = [create_ethanol()] with pytest.raises( ValueError, match='No match found for molecule C6H12') as excinfo: topology = Topology.from_openmm(pdbfile.topology, unique_molecules=molecules)
def test_from_openmm_duplicate_unique_mol(self): """Check that a DuplicateUniqueMoleculeError is raised if we try to pass in two indistinguishably unique mols""" from simtk.openmm import app pdbfile = app.PDBFile( get_data_file_path( 'systems/packmol_boxes/cyclohexane_ethanol_0.4_0.6.pdb')) molecules = [ Molecule.from_file(get_data_file_path(name)) for name in ('molecules/ethanol.mol2', 'molecules/ethanol_reordered.mol2', 'molecules/cyclohexane.mol2') ] with self.assertRaises(DuplicateUniqueMoleculeError) as context: topology = Topology.from_openmm(pdbfile.topology, unique_molecules=molecules)
def test_from_openmm_missing_conect(self): """ Test creation of an openforcefield Topology object from an OpenMM Topology when the origin PDB lacks CONECT records """ from simtk.openmm import app pdbfile = app.PDBFile( get_data_file_path('systems/test_systems/1_ethanol_no_conect.pdb')) molecules = [] molecules.append(Molecule.from_smiles('CCO')) with pytest.raises( ValueError, match='No match found for molecule C. This would be a ' 'very unusual molecule to try and parameterize, ' 'and it is likely that the data source it was ' 'read from does not contain connectivity ' 'information. If this molecule is coming from ' 'PDB, please ensure that the file contains CONECT ' 'records.') as excinfo: topology = Topology.from_openmm(pdbfile.topology, unique_molecules=molecules)
def execute(self, directory, available_resources): import mdtraj from openforcefield.topology import Molecule, Topology logging.info(f'Calculating the reduced gradient potentials for {self.parameter_key}: {self._id}') if len(self.reference_force_field_paths) != 1 and self.use_subset_of_force_field: return PropertyEstimatorException(directory, 'A single reference force field must be ' 'provided when calculating the reduced ' 'potentials using a subset of the full force') if len(self.reference_statistics_path) <= 0 and self.use_subset_of_force_field: return PropertyEstimatorException(directory, 'The path to the statistics evaluated using ' 'the full force field must be provided.') with open(self.force_field_path) as file: target_force_field_source = ForceFieldSource.parse_json(file.read()) if not isinstance(target_force_field_source, SmirnoffForceFieldSource): return PropertyEstimatorException(directory, 'Only SMIRNOFF force fields are supported by ' 'this protocol.') target_force_field = target_force_field_source.to_force_field() trajectory = mdtraj.load_dcd(self.trajectory_file_path, self.coordinate_file_path) unique_molecules = [] for component in self.substance.components: molecule = Molecule.from_smiles(smiles=component.smiles) unique_molecules.append(molecule) pdb_file = app.PDBFile(self.coordinate_file_path) topology = Topology.from_openmm(pdb_file.topology, unique_molecules=unique_molecules) # If we are using only a subset of the system object, load in the reference # statistics containing the full system energies to correct the output # forward and reverse potential energies. reference_statistics = None subset_energy_corrections = None if self.use_subset_of_force_field: reference_statistics = StatisticsArray.from_pandas_csv(self.reference_statistics_path) # Compute the reduced reference energy if any reference force field files # have been provided. self.reference_potential_paths = [] for index, reference_force_field_path in enumerate(self.reference_force_field_paths): with open(reference_force_field_path) as file: reference_force_field_source = ForceFieldSource.parse_json(file.read()) if not isinstance(reference_force_field_source, SmirnoffForceFieldSource): return PropertyEstimatorException(directory, 'Only SMIRNOFF force fields are supported by ' 'this protocol.') reference_force_field = reference_force_field_source.to_force_field() reference_system, _ = self._build_reduced_system(reference_force_field, topology) reference_potentials_path = path.join(directory, f'reference_{index}.csv') self._evaluate_reduced_potential(reference_system, trajectory, reference_potentials_path, available_resources) self.reference_potential_paths.append(reference_potentials_path) if reference_statistics is not None: subset_energies = StatisticsArray.from_pandas_csv(reference_potentials_path) subset_energy_corrections = (reference_statistics[ObservableType.PotentialEnergy] - subset_energies[ObservableType.PotentialEnergy]) subset_energies[ObservableType.PotentialEnergy] = reference_statistics[ObservableType.PotentialEnergy] subset_energies.to_pandas_csv(reference_potentials_path) # Build the slightly perturbed system. reverse_system, reverse_parameter_value = self._build_reduced_system(target_force_field, topology, -self.perturbation_scale) forward_system, forward_parameter_value = self._build_reduced_system(target_force_field, topology, self.perturbation_scale) self.reverse_parameter_value = openmm_quantity_to_pint(reverse_parameter_value) self.forward_parameter_value = openmm_quantity_to_pint(forward_parameter_value) # Calculate the reduced potentials. self.reverse_potentials_path = path.join(directory, 'reverse.csv') self.forward_potentials_path = path.join(directory, 'forward.csv') self._evaluate_reduced_potential(reverse_system, trajectory, self.reverse_potentials_path, available_resources, subset_energy_corrections) self._evaluate_reduced_potential(forward_system, trajectory, self.forward_potentials_path, available_resources, subset_energy_corrections) logging.info(f'Finished calculating the reduced gradient potentials.') return self._get_output_dictionary()
def test_component_combination(): """Test that a system still yields the same energy after rebuilding it out of its components """ from simtk import openmm from .utils import compare_system_energies, get_packmol_pdb_file_path # We've had issues where subsequent instances of a molecule might have zero charges # Here we'll try to catch this (and also explicitly check the charges) by re-building # a system out of its components # Create an OpenMM System from mol2 files containing a cyclohexane-ethanol mixture. AlkEthOH_offxml_filename = utils.get_data_file_path( 'test_forcefields/Frosst_AlkEthOH.offxml') forcefield = ForceField(AlkEthOH_offxml_filename) pdbfile = openmm.app.PDBFile( get_packmol_pdb_file_path('cyclohexane_ethanol_0.4_0.6')) sdf_file_paths = [ utils.get_data_file_path( os.path.join('systems', 'monomers', name + '.sdf')) for name in ('ethanol', 'cyclohexane') ] molecules = [Molecule.from_file(file_path) for file_path in sdf_file_paths] topology = Topology.from_openmm(pdbfile.topology, unique_molecules=molecules) system = forcefield.create_openmm_system(topology) # Convert System to a ParmEd Structure structure = parmed.openmm.topsystem.load_topology(topology.to_openmm(), system, pdbfile.positions) # Split the Structure into components, then re-compose it out of its components tmp = structure.split() strs, nums = [], [] for s, n in tmp: strs.append(s) nums.append(n) nums = [len(n) for n in nums] # Re-compose Structure from components new_structure = strs[0] * nums[0] for idx in range(1, len(nums)): new_structure += strs[idx] * nums[idx] # Swap in coordinates again new_structure.positions = structure.positions # Create System newsys = new_structure.createSystem(nonbondedMethod=openmm.app.NoCutoff, constraints=None, implicitSolvent=None) # Cross check energies groups0, groups1, energy0, energy1 = compare_system_energies( pdbfile.topology, pdbfile.topology, system, newsys, pdbfile.positions, verbose=False) # Also check that that the number of components is equal to the number I expect if not len(nums) == 2: print("Error: Test system has incorrect number of components.") raise Exception( 'Incorrect number of components in cyclohexane/ethanol test system.' ) # Also check that none of residues have zero charge for resnr in range(len(structure.residues)): abscharges = [ abs(structure.residues[resnr].atoms[idx].charge) for idx in range(len(structure.residues[resnr].atoms)) ] if sum(abscharges) == 0: raise Exception( 'Error: Residue %s in cyclohexane-ethanol test system has a charge of zero, which is incorrect.' % resnr)
def _execute(self, directory, available_resources): from openforcefield.topology import Molecule, Topology force_field_source = ForceFieldSource.from_json(self.force_field_path) cutoff = pint_quantity_to_openmm(force_field_source.cutoff) # Load in the systems topology openmm_pdb_file = app.PDBFile(self.coordinate_file_path) # Create an OFF topology for better insight into the layout of the system # topology. unique_molecules = {} for component in self.substance: unique_molecule = Molecule.from_smiles(component.smiles) unique_molecules[unique_molecule.to_smiles()] = unique_molecule # Parameterize each component in the system. system_templates = {} for index, (smiles, unique_molecule) in enumerate(unique_molecules.items()): if smiles in ["O", "[H]O[H]", "[H][O][H]"]: component_system = self._build_tip3p_system( cutoff, openmm_pdb_file.topology.getUnitCellDimensions(), ) else: component_directory = os.path.join(directory, str(index)) os.makedirs(component_directory, exist_ok=True) with temporarily_change_directory(component_directory): component_system = self._parameterize_molecule( unique_molecule, force_field_source, cutoff) system_templates[smiles] = component_system # Apply the parameters to the topology. topology = Topology.from_openmm(openmm_pdb_file.topology, unique_molecules.values()) # Create the full system object from the component templates. system = self._create_empty_system(cutoff) for topology_molecule in topology.topology_molecules: smiles = topology_molecule.reference_molecule.to_smiles() system_template = system_templates[smiles] index_map = {} for index, topology_atom in enumerate(topology_molecule.atoms): index_map[topology_atom.atom.molecule_particle_index] = index # Append the component template to the full system. self._append_system(system, system_template, index_map) if openmm_pdb_file.topology.getPeriodicBoxVectors() is not None: system.setDefaultPeriodicBoxVectors( *openmm_pdb_file.topology.getPeriodicBoxVectors()) # Serialize the system object. self.system_path = os.path.join(directory, "system.xml") with open(self.system_path, "w") as file: file.write(openmm.XmlSerializer.serialize(system))
def _execute(self, directory, available_resources): import mdtraj from openforcefield.topology import Molecule, Topology with open(self.force_field_path) as file: force_field_source = ForceFieldSource.parse_json(file.read()) if not isinstance(force_field_source, SmirnoffForceFieldSource): raise ValueError( "Only SMIRNOFF force fields are supported by this protocol.", ) # Load in the inputs force_field = force_field_source.to_force_field() trajectory = mdtraj.load_dcd(self.trajectory_file_path, self.coordinate_file_path) unique_molecules = [] for component in self.substance.components: molecule = Molecule.from_smiles(smiles=component.smiles) unique_molecules.append(molecule) pdb_file = app.PDBFile(self.coordinate_file_path) topology = Topology.from_openmm(pdb_file.topology, unique_molecules=unique_molecules) # Compute the difference between the energies using the reduced force field, # and the full force field. energy_corrections = None if self.use_subset_of_force_field: target_system, _ = self._build_reduced_system( force_field, topology) subset_potentials_path = os.path.join(directory, f"subset.csv") subset_potentials = self._evaluate_reduced_potential( target_system, trajectory, subset_potentials_path, available_resources) full_statistics = StatisticsArray.from_pandas_csv( self.statistics_path) energy_corrections = ( full_statistics[ObservableType.PotentialEnergy] - subset_potentials[ObservableType.PotentialEnergy]) # Build the slightly perturbed system. reverse_system, reverse_parameter_value = self._build_reduced_system( force_field, topology, -self.perturbation_scale) forward_system, forward_parameter_value = self._build_reduced_system( force_field, topology, self.perturbation_scale) self.reverse_parameter_value = openmm_quantity_to_pint( reverse_parameter_value) self.forward_parameter_value = openmm_quantity_to_pint( forward_parameter_value) # Calculate the reduced potentials. self.reverse_potentials_path = os.path.join(directory, "reverse.csv") self.forward_potentials_path = os.path.join(directory, "forward.csv") self._evaluate_reduced_potential( reverse_system, trajectory, self.reverse_potentials_path, available_resources, energy_corrections, ) self._evaluate_reduced_potential( forward_system, trajectory, self.forward_potentials_path, available_resources, energy_corrections, )
def execute(self, directory, available_resources): from openforcefield.topology import Molecule, Topology logging.info( f'Generating a system with tleap for {self.substance.identifier}: {self._id}' ) with open(self.force_field_path) as file: force_field_source = ForceFieldSource.parse_json(file.read()) if not isinstance(force_field_source, TLeapForceFieldSource): return PropertyEstimatorException( directory=directory, message='Only TLeap force field sources are supported by this ' 'protocol.') # Load in the systems coordinates / topology openmm_pdb_file = app.PDBFile(self.coordinate_file_path) # Create an OFF topology for better insight into the layout of the system topology. unique_molecules = [ Molecule.from_smiles(component.smiles) for component in self.substance.components ] topology = Topology.from_openmm(openmm_pdb_file.topology, unique_molecules) # Find a unique instance of each topology molecule to get the correct # atom orderings. topology_molecules = dict() for topology_molecule in topology.topology_molecules: topology_molecules[topology_molecule.reference_molecule.to_smiles( )] = topology_molecule system_templates = {} cutoff = pint_quantity_to_openmm(force_field_source.cutoff) for index, (smiles, topology_molecule) in enumerate( topology_molecules.items()): component_directory = os.path.join(directory, str(index)) if os.path.isdir(component_directory): shutil.rmtree(component_directory) os.makedirs(component_directory, exist_ok=True) if smiles != 'O' and smiles != '[H]O[H]': initial_mol2_name = 'initial.mol2' initial_mol2_path = os.path.join(component_directory, initial_mol2_name) self._topology_molecule_to_mol2(topology_molecule, initial_mol2_path, self.charge_backend) prmtop_path, _, error = self._run_tleap( force_field_source, initial_mol2_name, component_directory) if error is not None: return error prmtop_file = openmm.app.AmberPrmtopFile(prmtop_path) component_system = prmtop_file.createSystem( nonbondedMethod=app.PME, nonbondedCutoff=cutoff, constraints=app.HBonds, rigidWater=True, removeCMMotion=False) if openmm_pdb_file.topology.getPeriodicBoxVectors( ) is not None: component_system.setDefaultPeriodicBoxVectors( *openmm_pdb_file.topology.getPeriodicBoxVectors()) else: component_system = self._build_tip3p_system( topology_molecule, cutoff, openmm_pdb_file.topology.getUnitCellDimensions()) system_templates[ unique_molecules[index].to_smiles()] = component_system with open(os.path.join(component_directory, f'component.xml'), 'w') as file: file.write(openmm.XmlSerializer.serialize(component_system)) # Create the full system object from the component templates. system = None for topology_molecule in topology.topology_molecules: system_template = system_templates[ topology_molecule.reference_molecule.to_smiles()] if system is None: # If no system has been set up yet, just use the first template. system = copy.deepcopy(system_template) continue # Append the component template to the full system. self._append_system(system, system_template) # Serialize the system object. system_xml = openmm.XmlSerializer.serialize(system) self.system_path = os.path.join(directory, 'system.xml') with open(self.system_path, 'w') as file: file.write(system_xml) logging.info(f'System generated: {self.id}') return self._get_output_dictionary()
def execute(self, directory, available_resources): from openforcefield.topology import Molecule, Topology logging.info('Generating topology: ' + self.id) pdb_file = app.PDBFile(self.coordinate_file_path) try: with open(self.force_field_path) as file: force_field_source = ForceFieldSource.parse_json(file.read()) except Exception as e: return PropertyEstimatorException( directory=directory, message='{} could not load the ForceFieldSource: {}'.format( self.id, e)) if not isinstance(force_field_source, SmirnoffForceFieldSource): return PropertyEstimatorException( directory=directory, message='Only SMIRNOFF force fields are supported by this ' 'protocol.') force_field = force_field_source.to_force_field() unique_molecules = [] charged_molecules = [] if self.apply_known_charges: charged_molecules = self._generate_known_charged_molecules() # Load in any additional, user specified charged molecules. for charged_molecule_path in self.charged_molecule_paths: charged_molecule = Molecule.from_file(charged_molecule_path, 'MOL2') charged_molecules.append(charged_molecule) for component in self.substance.components: molecule = Molecule.from_smiles(smiles=component.smiles) if molecule is None: return PropertyEstimatorException( directory=directory, message='{} could not be converted to a Molecule'.format( component)) unique_molecules.append(molecule) topology = Topology.from_openmm(pdb_file.topology, unique_molecules=unique_molecules) if len(charged_molecules) > 0: system = force_field.create_openmm_system( topology, charge_from_molecules=charged_molecules) else: system = force_field.create_openmm_system(topology) if system is None: return PropertyEstimatorException( directory=directory, message='Failed to create a system from the' 'provided topology and molecules') from simtk.openmm import XmlSerializer system_xml = XmlSerializer.serialize(system) self.system_path = os.path.join(directory, 'system.xml') with open(self.system_path, 'wb') as file: file.write(system_xml.encode('utf-8')) logging.info('Topology generated: ' + self.id) return self._get_output_dictionary()
def execute(self, directory, available_resources): import mdtraj from openforcefield.topology import Molecule, Topology logging.info( f'Generating a system with LigParGen for {self.substance.identifier}: {self._id}' ) with open(self.force_field_path) as file: force_field_source = ForceFieldSource.parse_json(file.read()) if not isinstance(force_field_source, LigParGenForceFieldSource): return PropertyEstimatorException( directory=directory, message= 'Only LigParGen force field sources are supported by this ' 'protocol.') # Load in the systems coordinates / topology openmm_pdb_file = app.PDBFile(self.coordinate_file_path) # Create an OFF topology for better insight into the layout of the system topology. unique_molecules = [ Molecule.from_smiles(component.smiles) for component in self.substance.components ] # Create a dictionary of representative topology molecules for each component. topology = Topology.from_openmm(openmm_pdb_file.topology, unique_molecules) # Create the template system objects for each component in the system. system_templates = {} cutoff = pint_quantity_to_openmm(force_field_source.cutoff) for index, component in enumerate(self.substance.components): reference_topology_molecule = None # Create temporary pdb files for each molecule type in the system, with their constituent # atoms ordered in the same way that they would be in the full system. topology_molecule = None for topology_molecule in topology.topology_molecules: if topology_molecule.reference_molecule.to_smiles( ) != unique_molecules[index].to_smiles(): continue reference_topology_molecule = topology_molecule break if reference_topology_molecule is None or topology_molecule is None: return PropertyEstimatorException( 'A topology molecule could not be matched to its reference.' ) # Create the force field template using the LigParGen server. if component.smiles != 'O' and component.smiles != '[H]O[H]': force_field_path = self._parameterize_smiles( component.smiles, force_field_source, directory) start_index = reference_topology_molecule.atom_start_topology_index end_index = start_index + reference_topology_molecule.n_atoms index_range = list(range(start_index, end_index)) component_pdb_file = mdtraj.load_pdb(self.coordinate_file_path, atom_indices=index_range) component_topology = component_pdb_file.topology.to_openmm() component_topology.setUnitCellDimensions( openmm_pdb_file.topology.getUnitCellDimensions()) # Create the system object. # noinspection PyTypeChecker force_field_template = app.ForceField(force_field_path) component_system = force_field_template.createSystem( topology=component_topology, nonbondedMethod=app.PME, nonbondedCutoff=cutoff, constraints=app.HBonds, rigidWater=True, removeCMMotion=False) else: component_system = self._build_tip3p_system( topology_molecule, cutoff, openmm_pdb_file.topology.getUnitCellDimensions()) system_templates[ unique_molecules[index].to_smiles()] = component_system # Create the full system object from the component templates. system = None for topology_molecule in topology.topology_molecules: system_template = system_templates[ topology_molecule.reference_molecule.to_smiles()] if system is None: # If no system has been set up yet, just use the first template. system = copy.deepcopy(system_template) continue # Append the component template to the full system. self._append_system(system, system_template) # Apply the OPLS mixing rules. self._apply_opls_mixing_rules(system) # Serialize the system object. system_xml = openmm.XmlSerializer.serialize(system) self.system_path = os.path.join(directory, 'system.xml') with open(self.system_path, 'wb') as file: file.write(system_xml.encode('utf-8')) logging.info(f'System generated: {self.id}') return self._get_output_dictionary()
def run_md(molecule, solvent_name="chloroform", confId=0): """ Uses the PARSLEY forcefield to compute molecule ``molecule`` in a cubic box of solvent at STP. Details: - Particle mesh Ewald summation is used (1 nm cutoff) - Periodic boundary conditions are employed - Langevin thermostat is employed to regulate temperature - Box size is automatically scaled to the desired number of solvent molecules Args: molecule (openforcefield.topology.Molecule): desired molecule solvent_name (str): either ``chloroform`` or ``benzene``, for now confId (int): conformer ID for autogenerated molecular conformers, 0 seems fine by default Returns: Nothing, but ``.csv``, ``.hdf5``, and ``.pdb`` files are generated in the current directory. """ #### Load in the appropriate Molecule object off_solute = molecule.to_topology() omm_solute = off_solute.to_openmm() mdt_solute = mdt.Topology.from_openmm(omm_solute) #### Build solvent Molecule object solvent, density, mw = None, None, None if solvent_name == "chloroform": solvent = Molecule.from_smiles("C(Cl)(Cl)Cl") density = 1.49 mw = 119.38 elif solvent_name == "benzene": solvent = Molecule.from_smiles("c1ccccc1") density = 0.879 mw = 78.11 else: raise ValueError(f"Unknown solvent {solvent_name}!") solvent.generate_conformers() off_solvent = solvent.to_topology() omm_solvent = off_solvent.to_openmm() mdt_solvent = mdt.Topology.from_openmm(omm_solvent) #### Calculate box side length num, length = None, None if "num" in config: num = config["num"] assert isinstance(num, int), "Need an integer number of solvent molecules." assert num > 0, "Need a positive number of solvent molecules." length = (1.6606 * num * mw / density)**( 1 / 3) # 1.6606 = 10^24 (Å**3 per mL) divided by Avogadro's number elif "length" in config: length = config["length"] assert isinstance(length, (int, float)), "Need a numeric side length." assert length > 0, "Need a positive length." num = (length**3) * density / (mw * 1.6606) num = int(num) else: raise ValueError("Need ``length`` or ``num`` in config file!") logger.info( f"{num} solvent molecules in a cube with {length:.2f} Å sides.") #### Write solvent and solute to ``.pdb`` files for PACKMOL solute_pdb = "solute.pdb" with open(solute_pdb, "w+") as f: openmm.app.pdbfile.PDBFile.writeFile(omm_solute, molecule.conformers[confId], f) solvent_pdb = "solvent.pdb" with open(solvent_pdb, "w+") as f: openmm.app.pdbfile.PDBFile.writeFile(omm_solvent, solvent.conformers[0], f) #### Use ``openmoltools`` Python wrapper for PACKMOL to fill the box appropriately mdt_trajectory = pack_box([solute_pdb, solvent_pdb], [1, num], box_size=length) #### Convert back to ``openforcefield`` omm_topology = mdt_trajectory.top.to_openmm() length = length / 10 # OpenMM uses nanometers for some stupid reason omm_topology.setPeriodicBoxVectors( ((length, 0, 0), (0, length, 0), (0, 0, length))) off_topology = Topology.from_openmm(omm_topology, [ Molecule.from_topology(off_solute), Molecule.from_topology(off_solvent) ]) logger.info(f"BOX VECTORS: {off_topology.box_vectors}") #### Set up the OpenMM system forcefield.get_parameter_handler('Electrostatics').method = 'PME' system = forcefield.create_openmm_system(off_topology) time_step = config["time_step"] * unit.femtoseconds temperature = config["temperature"] * unit.kelvin friction = 1 / unit.picosecond integrator = openmm.LangevinIntegrator(temperature, friction, time_step) #### Set up the simulation simulation = openmm.app.Simulation(omm_topology, system, integrator) logger.info(f"Simulation object created.") simulation.context.setPositions(mdt_trajectory.openmm_positions(0)) logger.info(f"Positions loaded.") # pdb_reporter = openmm.app.PDBReporter('trj.pdb', config["pdb_freq"]) hdf5_reporter = mdt.reporters.HDF5Reporter('trj.hdf5', config["hdf5_freq"]) state_data_reporter = openmm.app.StateDataReporter("data.csv", config["data_freq"], step=True, potentialEnergy=True, temperature=True, density=True) # simulation.reporters.append(pdb_reporter) simulation.reporters.append(hdf5_reporter) simulation.reporters.append(state_data_reporter) logger.info("Using Platform: " + simulation.context.getPlatform().getName()) #### Clean up ``.pdb`` files os.remove(solute_pdb) os.remove(solvent_pdb) logger.info("Minimizing...") simulation.minimizeEnergy(maxIterations=25) logger.info("Running...") w_start = time.time() p_start = time.process_time() simulation.step(config["num_steps"]) w_end = time.time() p_end = time.process_time() logger.info( f"Elapsed time {w_end-w_start:.2f} s (CPU: {p_end-p_start:.2f} s)") logger.info("Done")
def _get_residue_names_from_role(substances, coordinate_path, role): """Returns a list of all of the residue names of components which have been assigned a given role. Parameters ---------- substances: list of Substance The substances which contains the components. coordinate_path: str The path to the coordinates which describe the systems topology. role: Substance.ComponentRole The role of the component to identify. Returns ------- set of str The identified residue names. """ from simtk.openmm import app from openforcefield.topology import Molecule, Topology if role == Substance.ComponentRole.Undefined: return 'all' unique_molecules = [ Molecule.from_smiles(component.smiles) for substance in substances for component in substance.components ] openmm_topology = app.PDBFile(coordinate_path).topology topology = Topology.from_openmm(openmm_topology, unique_molecules) # Determine the smiles of all molecules in the system. We need to use # the toolkit to re-generate the smiles as later we will compare these # against more toolkit generated smiles. components = [ component for substance in substances for component in substance.components if component.role == role ] component_smiles = [ Molecule.from_smiles(component.smiles).to_smiles() for component in components ] residue_names = set() all_openmm_atoms = list(openmm_topology.atoms()) # Find the resiude names of the molecules which have the correct # role. for topology_molecule in topology.topology_molecules: molecule_smiles = topology_molecule.reference_molecule.to_smiles() if molecule_smiles not in component_smiles: continue molecule_residue_names = set([ all_openmm_atoms[ topology_atom.topology_atom_index].residue.name for topology_atom in topology_molecule.atoms ]) assert len(molecule_residue_names) == 1 residue_names.update(molecule_residue_names) return residue_names
def prepare(self, pbc=False, mmopts={}, **kwargs): """ Prepare the calculation. Note that we don't create the Simulation object yet, because that may depend on MD integrator parameters, thermostat, barostat etc. This is mostly copied and modified from openmmio.py's OpenMM.prepare(), but we are calling ForceField() from the OpenFF toolkit and ignoring AMOEBA stuff. """ self.pdb = PDBFile(self.abspdb) # Create the OpenFF ForceField object. if hasattr(self, 'FF'): self.offxml = [self.FF.offxml] self.forcefield = self.FF.openff_forcefield else: self.offxml = listfiles(kwargs.get('offxml'), 'offxml', err=True) self.forcefield = OpenFF_ForceField(*self.offxml) ## Load mol2 files for smirnoff topology openff_mols = [] for fnm in self.mol2: try: mol = OffMolecule.from_file(fnm) except Exception as e: logger.error("Error when loading %s" % fnm) raise e openff_mols.append(mol) self.off_topology = OffTopology.from_openmm(self.pdb.topology, unique_molecules=openff_mols) # used in create_simulation() self.mod = Modeller(self.pdb.topology, self.pdb.positions) ## OpenMM options for setting up the System. self.mmopts = dict(mmopts) ## Specify frozen atoms and restraint force constant if 'restrain_k' in kwargs: self.restrain_k = kwargs['restrain_k'] if 'freeze_atoms' in kwargs: self.freeze_atoms = kwargs['freeze_atoms'][:] ## Set system options from ForceBalance force field options. fftmp = False if hasattr(self,'FF'): self.mmopts['rigidWater'] = self.FF.rigid_water if not all([os.path.exists(f) for f in self.FF.fnms]): # If the parameter files don't already exist, create them for the purpose of # preparing the engine, but then delete them afterward. fftmp = True self.FF.make(np.zeros(self.FF.np)) ## Set system options from periodic boundary conditions. self.pbc = pbc ## print warning for 'nonbonded_cutoff' keywords if 'nonbonded_cutoff' in kwargs: logger.warning("nonbonded_cutoff keyword ignored because it's set in the offxml file\n") ## Generate OpenMM-compatible positions self.xyz_omms = [] for I in range(len(self.mol)): position = self.mol.xyzs[I] * angstrom # xyz_omm = [Vec3(i[0],i[1],i[2]) for i in xyz]*angstrom # An extra step with adding virtual particles # mod = Modeller(self.pdb.topology, xyz_omm) # LPW commenting out because we don't have virtual sites yet. # mod.addExtraParticles(self.forcefield) if self.pbc: # Obtain the periodic box if self.mol.boxes[I].alpha != 90.0 or self.mol.boxes[I].beta != 90.0 or self.mol.boxes[I].gamma != 90.0: logger.error('OpenMM cannot handle nonorthogonal boxes.\n') raise RuntimeError box_omm = np.diag([self.mol.boxes[I].a, self.mol.boxes[I].b, self.mol.boxes[I].c]) * angstrom else: box_omm = None # Finally append it to list. self.xyz_omms.append((position, box_omm)) ## Build a topology and atom lists. Top = self.pdb.topology Atoms = list(Top.atoms()) Bonds = [(a.index, b.index) for a, b in list(Top.bonds())] # vss = [(i, [system.getVirtualSite(i).getParticle(j) for j in range(system.getVirtualSite(i).getNumParticles())]) \ # for i in range(system.getNumParticles()) if system.isVirtualSite(i)] self.AtomLists = defaultdict(list) self.AtomLists['Mass'] = [a.element.mass.value_in_unit(dalton) if a.element is not None else 0 for a in Atoms] self.AtomLists['ParticleType'] = ['A' if m >= 1.0 else 'D' for m in self.AtomLists['Mass']] self.AtomLists['ResidueNumber'] = [a.residue.index for a in Atoms] self.AtomMask = [a == 'A' for a in self.AtomLists['ParticleType']] self.realAtomIdxs = [i for i, a in enumerate(self.AtomMask) if a is True] if hasattr(self,'FF') and fftmp: for f in self.FF.fnms: os.unlink(f)
def minimize(dat_file, lst_angle, pdb_dir, sdf_dir, coor_dir=None, xml_dir=None): # The simulation configuration time_step = 2 * unit.femtoseconds # simulation timestep temperature = 300 * unit.kelvin # simulation temperature friction = 1 / unit.picosecond # collision rate minimize_tolerance = 1e-5 * unit.kilojoule / unit.mole minimize_iteration_step = 1000000 forcefield = ForceField('openff-1.1.1.offxml') list_name, list_atoms = read_data(dat_file) list_energies = [] for i, name in enumerate(list_name): pdbfile = PDBFile(pdb_dir + '/' + pdb_format.format(name)) uni_mol = Molecule.from_file(sdf_dir + '/' + sdf_format.format(name)) list_energy = [] previous_structure = pdbfile.getPositions() for angle in lst_angle: # Load the structure topo = pdbfile.topology topo_ff = Topology.from_openmm(topo, [uni_mol]) system = forcefield.create_openmm_system(topo_ff) restrain_force = make_restrain_torsion(list_atoms[i], float(angle), 1e6) system.addForce(restrain_force) integrator = openmm.LangevinIntegrator(temperature, friction, time_step) simulation = openmm.app.Simulation(topo, system, integrator) #positions = pdbfile.getPositions() #simulation.context.setPositions(positions) simulation.context.setPositions(previous_structure) #simulation.context.setVelocitiesToTemperature(temperature) simulation.minimizeEnergy(tolerance=minimize_tolerance, maxIterations=minimize_iteration_step) energy_list = extract_energy(simulation) sum_energy = 0.0 for j in range(len(energy_list) - 1): sum_energy += energy_list[j] list_energy.append([sum_energy, energy_list[-1]]) previous_structure = simulation.context.getState( getPositions=True).getPositions() if coor_dir != None: write_xyz( previous_structure, topo, coor_dir + '/' + str(name) + '_' + str(angle) + '.xyz') if xml_dir != None: write_xml( topo, xml_dir + '/' + str(name) + '_' + str(angle) + '.xml') list_energies.append(list_energy) # Check f = open('check.log', 'a') f.write(str(i) + '\n') f.close() return list_energies