def _filter_function(self, result, record, molecule) -> bool: has_stereochemistry = True try: for toolkit_name in self.toolkits: if toolkit_name == "openeye": toolkit_registry = OpenEyeToolkitWrapper() elif toolkit_name == "rdkit": toolkit_registry = RDKitToolkitWrapper() else: raise NotImplementedError() for conformer in molecule.conformers: stereo_molecule = copy.deepcopy(molecule) stereo_molecule._conformers = [conformer] with NamedTemporaryFile(suffix=".sdf") as file: stereo_molecule.to_file(file.name, "SDF") stereo_molecule.from_file( file.name, toolkit_registry=toolkit_registry) except UndefinedStereochemistryError: has_stereochemistry = False return has_stereochemistry
def test_chemical_environments_matches_RDK(self): """Test Topology.chemical_environment_matches""" from simtk.openmm import app toolkit_wrapper = RDKitToolkitWrapper() pdbfile = app.PDBFile( get_data_file_path( "systems/packmol_boxes/cyclohexane_ethanol_0.4_0.6.pdb")) # toolkit_wrapper = RDKitToolkitWrapper() # molecules = [Molecule.from_file(get_data_file_path(name)) for name in ('molecules/ethanol.mol2', # 'molecules/cyclohexane.mol2')] molecules = [] molecules.append(Molecule.from_smiles("CCO")) molecules.append(Molecule.from_smiles("C1CCCCC1")) topology = Topology.from_openmm(pdbfile.topology, unique_molecules=molecules) # Count CCO matches matches = topology.chemical_environment_matches( "[C:1]-[C:2]-[O:3]", toolkit_registry=toolkit_wrapper) assert len(matches) == 143 assert matches[0].topology_atom_indices == (1728, 1729, 1730) matches = topology.chemical_environment_matches( "[H][C:1]([H])([H])-[C:2]([H])([H])-[O:3][H]", toolkit_registry=toolkit_wrapper, ) assert (len(matches) == 1716 ) # 143 * 12 (there are 12 possible hydrogen mappings) assert matches[0].topology_atom_indices == (1728, 1729, 1730) # Search for a substructure that isn't there matches = topology.chemical_environment_matches( "[C][C:1]-[C:2]-[O:3]", toolkit_registry=toolkit_wrapper) assert len(matches) == 0
def test_filter_cli(openff_methane: Molecule, runner): # Create an SDF file to filter. with stream_to_file("molecules.sdf") as writer: writer( Molecule.from_smiles("C1(=C(C(=C(C(=C1Cl)Cl)Cl)Cl)Cl)[O-].[Na+]")) writer(Molecule.from_smiles("CCC(C)(C)C(F)(F)CCCCC(F)(F)C(C)(C)CC")) arguments = [ "--input", "molecules.sdf", "--output", "filtered.sdf", "--strip-ions" ] result = runner.invoke(filter_cli, arguments) if result.exit_code != 0: raise result.exception assert os.path.isfile("filtered.sdf") filtered_molecules = [ molecule for molecule in stream_from_file("filtered.sdf") ] assert len(filtered_molecules) == 1 filtered_molecule = filtered_molecules[0] assert (filtered_molecule.to_smiles(toolkit_registry=RDKitToolkitWrapper()) == "[O-][c]1[c]([Cl])[c]([Cl])[c]([Cl])[c]([Cl])[c]1[Cl]")
def test_enumerate_cli(openff_methane: Molecule, runner): # Create an SDF file to enumerate. buteneol = Molecule.from_smiles(r"C/C=C(/C)\O") with stream_to_file("molecules.sdf") as writer: writer(buteneol) writer(buteneol) arguments = ["--input", "molecules.sdf", "--output", "tautomers.sdf", "--tautomers"] result = runner.invoke(enumerate_cli, arguments) if result.exit_code != 0: raise result.exception assert os.path.isfile("tautomers.sdf") tautomers = [molecule for molecule in stream_from_file("tautomers.sdf")] assert len(tautomers) == 4 assert { tautomer.to_smiles( explicit_hydrogens=False, toolkit_registry=RDKitToolkitWrapper() ) for tautomer in tautomers } == {"C/C=C(/C)O", "C=C(O)CC", "CCC(C)=O", "CC=C(C)O"}
assert len(ring_systems) == 1 assert len(ring_systems[1][0]) == 5 @pytest.mark.parametrize( "toolkit_registry, expected_provenance", [ ( None, [ toolkit.__class__.__name__ for toolkit in GLOBAL_TOOLKIT_REGISTRY.registered_toolkits ], ), (RDKitToolkitWrapper(), ["RDKitToolkitWrapper"]), (ToolkitRegistry([RDKitToolkitWrapper]), ["RDKitToolkitWrapper"]), ], ) def test_fragmenter_provenance(toolkit_registry, expected_provenance): class DummyFragmenter(Fragmenter): def _fragment(self, molecule: Molecule, target_bond_smarts: str) -> FragmentationResult: return FragmentationResult(parent_smiles="[He:1]", fragments=[], provenance={}) result = DummyFragmenter().fragment(Molecule.from_smiles("[He]"), ["[*:1]~[*:2]"], toolkit_registry)
def compute_conformer_energies_from_file(filename): # Load in the molecule and its conformers. # Note that all conformers of the same molecule are loaded as separate Molecule objects # If using a OFF Toolkit version before 0.7.0, loading SDFs through RDKit and OpenEye may provide # different behavior in some cases. So, here we force loading through RDKit to ensure the correct behavior rdktkw = RDKitToolkitWrapper() loaded_molecules = Molecule.from_file(filename, toolkit_registry=rdktkw) # The logic below only works for lists of molecules, so if a # single molecule was loaded, cast it to list if type(loaded_molecules) is not list: loaded_molecules = [loaded_molecules] # Collatate all conformers of the same molecule # NOTE: This isn't necessary if you have already loaded or created multi-conformer molecules; # it is just needed because our SDF reader does not automatically collapse conformers. molecules = [loaded_molecules[0]] for molecule in loaded_molecules[1:]: if molecule == molecules[-1]: for conformer in molecule.conformers: molecules[-1].add_conformer(conformer) else: molecules.append(molecule) n_molecules = len(molecules) n_conformers = sum([mol.n_conformers for mol in molecules]) print( f'{n_molecules} unique molecule(s) loaded, with {n_conformers} total conformers' ) # Load the openff-1.1.0 force field appropriate for vacuum calculations (without constraints) from openff.toolkit.typing.engines.smirnoff import ForceField forcefield = ForceField('openff_unconstrained-1.1.0.offxml') # Loop over molecules and minimize each conformer for molecule in molecules: # If the molecule doesn't have a name, set mol.name to be the hill formula if molecule.name == '': molecule.name = Topology._networkx_to_hill_formula( molecule.to_networkx()) print('%s : %d conformers' % (molecule.name, molecule.n_conformers)) # Make a temporary copy of the molecule that we can update for each minimization mol_copy = Molecule(molecule) # Make an OpenFF Topology so we can parameterize the system off_top = molecule.to_topology() print( f"Parametrizing {molecule.name} (may take a moment to calculate charges)" ) system = forcefield.create_openmm_system(off_top) # Use OpenMM to compute initial and minimized energy for all conformers integrator = openmm.VerletIntegrator(1 * unit.femtoseconds) platform = openmm.Platform.getPlatformByName('Reference') omm_top = off_top.to_openmm() simulation = openmm.app.Simulation(omm_top, system, integrator, platform) # Print text header print( 'Conformer Initial PE Minimized PE RMS between initial and minimized conformer' ) output = [[ 'Conformer', 'Initial PE (kcal/mol)', 'Minimized PE (kcal/mol)', 'RMS between initial and minimized conformer (Angstrom)' ]] for conformer_index, conformer in enumerate(molecule.conformers): simulation.context.setPositions(conformer) orig_potential = simulation.context.getState( getEnergy=True).getPotentialEnergy() simulation.minimizeEnergy() min_state = simulation.context.getState(getEnergy=True, getPositions=True) min_potential = min_state.getPotentialEnergy() # Calculate the RMSD between the initial and minimized conformer min_coords = min_state.getPositions() min_coords = np.array([[atom.x, atom.y, atom.z] for atom in min_coords]) * unit.nanometer mol_copy._conformers = None mol_copy.add_conformer(conformer) mol_copy.add_conformer(min_coords) rdmol = mol_copy.to_rdkit() rmslist = [] rdMolAlign.AlignMolConformers(rdmol, RMSlist=rmslist) minimization_rms = rmslist[0] # Save the minimized conformer to file mol_copy._conformers = None mol_copy.add_conformer(min_coords) mol_copy.to_file( f'{molecule.name}_conf{conformer_index+1}_minimized.sdf', file_format='sdf') print( '%5d / %5d : %8.3f kcal/mol %8.3f kcal/mol %8.3f Angstroms' % (conformer_index + 1, molecule.n_conformers, orig_potential / unit.kilocalories_per_mole, min_potential / unit.kilocalories_per_mole, minimization_rms)) output.append([ str(conformer_index + 1), f'{orig_potential/unit.kilocalories_per_mole:.3f}', f'{min_potential/unit.kilocalories_per_mole:.3f}', f'{minimization_rms:.3f}' ]) # Write the results out to CSV with open(f'{molecule.name}.csv', 'w') as of: for line in output: of.write(','.join(line) + '\n') # Clean up OpenMM Simulation del simulation, integrator