def _topology_molecule_to_mol2(topology_molecule, file_name, charge_backend): """Converts an `openforcefield.topology.TopologyMolecule` into a mol2 file, generating a conformer and AM1BCC charges in the process. .. todo :: This function uses non-public methods from the Open Force Field toolkit and should be refactored when public methods become available Parameters ---------- topology_molecule: openforcefield.topology.TopologyMolecule The `TopologyMolecule` to write out as a mol2 file. The atom ordering in this mol2 will be consistent with the topology ordering. file_name: str The filename to write to. charge_backend: BuildTLeapSystem.ChargeBackend The backend to use for conformer generation and partial charge calculation. """ from openforcefield.topology import Molecule from simtk import unit as simtk_unit # Make a copy of the reference molecule so we can run conf gen / charge calc without modifying the original reference_molecule = copy.deepcopy( topology_molecule.reference_molecule) if charge_backend == BuildTLeapSystem.ChargeBackend.OpenEye: from openforcefield.utils.toolkits import OpenEyeToolkitWrapper toolkit_wrapper = OpenEyeToolkitWrapper() reference_molecule.generate_conformers( toolkit_registry=toolkit_wrapper) reference_molecule.compute_partial_charges_am1bcc( toolkit_registry=toolkit_wrapper) elif charge_backend == BuildTLeapSystem.ChargeBackend.AmberTools: from openforcefield.utils.toolkits import RDKitToolkitWrapper, AmberToolsToolkitWrapper, ToolkitRegistry toolkit_wrapper = ToolkitRegistry(toolkit_precedence=[ RDKitToolkitWrapper, AmberToolsToolkitWrapper ]) reference_molecule.generate_conformers( toolkit_registry=toolkit_wrapper) reference_molecule.compute_partial_charges_am1bcc( toolkit_registry=toolkit_wrapper) else: raise ValueError(f'Invalid toolkit specification.') # Get access to the parent topology, so we can look up the topology atom indices later. topology = topology_molecule.topology # Make and populate a new openforcefield.topology.Molecule new_molecule = Molecule() new_molecule.name = reference_molecule.name # Add atoms to the new molecule in the correct order for topology_atom in topology_molecule.atoms: # Force the topology to cache the topology molecule start indices topology.atom(topology_atom.topology_atom_index) new_molecule.add_atom(topology_atom.atom.atomic_number, topology_atom.atom.formal_charge, topology_atom.atom.is_aromatic, topology_atom.atom.stereochemistry, topology_atom.atom.name) # Add bonds to the new molecule for topology_bond in topology_molecule.bonds: # This is a temporary workaround to figure out what the "local" atom index of # these atoms is. In other words it is the offset we need to apply to get the # index if this were the only molecule in the whole Topology. We need to apply # this offset because `new_molecule` begins its atom indexing at 0, not the # real topology atom index (which we do know). index_offset = topology_molecule._atom_start_topology_index # Convert the `.atoms` generator into a list so we can access it by index topology_atoms = list(topology_bond.atoms) new_molecule.add_bond( topology_atoms[0].topology_atom_index - index_offset, topology_atoms[1].topology_atom_index - index_offset, topology_bond.bond.bond_order, topology_bond.bond.is_aromatic, topology_bond.bond.stereochemistry, ) # Transfer over existing conformers and partial charges, accounting for the # reference/topology indexing differences new_conformers = np.zeros((reference_molecule.n_atoms, 3)) new_charges = np.zeros(reference_molecule.n_atoms) # Then iterate over the reference atoms, mapping their indices to the topology # molecule's indexing system for reference_atom_index in range(reference_molecule.n_atoms): # We don't need to apply the offset here, since _ref_to_top_index is # already "locally" indexed for this topology molecule local_top_index = topology_molecule._ref_to_top_index[ reference_atom_index] new_conformers[local_top_index, :] = reference_molecule.conformers[ 0][reference_atom_index].value_in_unit(simtk_unit.angstrom) new_charges[local_top_index] = reference_molecule.partial_charges[ reference_atom_index].value_in_unit( simtk_unit.elementary_charge) # Reattach the units new_molecule.add_conformer(new_conformers * simtk_unit.angstrom) new_molecule.partial_charges = new_charges * simtk_unit.elementary_charge # Write the molecule new_molecule.to_file(file_name, file_format='mol2')
def compute_conformer_energies_from_file(filename): # Load in the molecule and its conformers. # Note that all conformers of the same molecule are loaded as separate Molecule objects # If using a OFF Toolkit version before 0.7.0, loading SDFs through RDKit and OpenEye may provide # different behavior in some cases. So, here we force loading through RDKit to ensure the correct behavior rdktkw = RDKitToolkitWrapper() loaded_molecules = Molecule.from_file(filename, toolkit_registry=rdktkw) # Collatate all conformers of the same molecule # NOTE: This isn't necessary if you have already loaded or created multi-conformer molecules; # it is just needed because our SDF reader does not automatically collapse conformers. molecules = [loaded_molecules[0]] for molecule in loaded_molecules[1:]: if molecule == molecules[-1]: for conformer in molecule.conformers: molecules[-1].add_conformer(conformer) else: molecules.append(molecule) n_molecules = len(molecules) n_conformers = sum([mol.n_conformers for mol in molecules]) print( f'{n_molecules} unique molecule(s) loaded, with {n_conformers} total conformers' ) # Load the openff-1.1.0 force field appropriate for vacuum calculations (without constraints) from openforcefield.typing.engines.smirnoff import ForceField forcefield = ForceField('openff_unconstrained-1.1.0.offxml') # Loop over molecules and minimize each conformer for molecule in molecules: # If the molecule doesn't have a name, set mol.name to be the hill formula if molecule.name == '': molecule.name = Topology._networkx_to_hill_formula( molecule.to_networkx()) print('%s : %d conformers' % (molecule.name, molecule.n_conformers)) # Make a temporary copy of the molecule that we can update for each minimization mol_copy = Molecule(molecule) # Make an OpenFF Topology so we can parameterize the system off_top = molecule.to_topology() print( f"Parametrizing {molecule.name} (may take a moment to calculate charges)" ) system = forcefield.create_openmm_system(off_top) # Use OpenMM to compute initial and minimized energy for all conformers integrator = openmm.VerletIntegrator(1 * unit.femtoseconds) platform = openmm.Platform.getPlatformByName('Reference') omm_top = off_top.to_openmm() simulation = openmm.app.Simulation(omm_top, system, integrator, platform) # Print text header print( 'Conformer Initial PE Minimized PE RMS between initial and minimized conformer' ) output = [[ 'Conformer', 'Initial PE (kcal/mol)', 'Minimized PE (kcal/mol)', 'RMS between initial and minimized conformer (Angstrom)' ]] for conformer_index, conformer in enumerate(molecule.conformers): simulation.context.setPositions(conformer) orig_potential = simulation.context.getState( getEnergy=True).getPotentialEnergy() simulation.minimizeEnergy() min_state = simulation.context.getState(getEnergy=True, getPositions=True) min_potential = min_state.getPotentialEnergy() # Calculate the RMSD between the initial and minimized conformer min_coords = min_state.getPositions() min_coords = np.array([[atom.x, atom.y, atom.z] for atom in min_coords]) * unit.nanometer mol_copy._conformers = None mol_copy.add_conformer(conformer) mol_copy.add_conformer(min_coords) rdmol = mol_copy.to_rdkit() rmslist = [] rdMolAlign.AlignMolConformers(rdmol, RMSlist=rmslist) minimization_rms = rmslist[0] # Save the minimized conformer to file mol_copy._conformers = None mol_copy.add_conformer(min_coords) mol_copy.to_file( f'{molecule.name}_conf{conformer_index+1}_minimized.sdf', file_format='sdf') print( '%5d / %5d : %8.3f kcal/mol %8.3f kcal/mol %8.3f Angstroms' % (conformer_index + 1, molecule.n_conformers, orig_potential / unit.kilocalories_per_mole, min_potential / unit.kilocalories_per_mole, minimization_rms)) output.append([ str(conformer_index + 1), f'{orig_potential/unit.kilocalories_per_mole:.3f}', f'{min_potential/unit.kilocalories_per_mole:.3f}', f'{minimization_rms:.3f}' ]) # Write the results out to CSV with open(f'{molecule.name}.csv', 'w') as of: for line in output: of.write(','.join(line) + '\n') # Clean up OpenMM Simulation del simulation, integrator