def _filter_function(self, result, record, molecule) -> bool:

        has_stereochemistry = True

        try:

            for toolkit_name in self.toolkits:

                if toolkit_name == "openeye":
                    toolkit_registry = OpenEyeToolkitWrapper()
                elif toolkit_name == "rdkit":
                    toolkit_registry = RDKitToolkitWrapper()
                else:
                    raise NotImplementedError()

                for conformer in molecule.conformers:

                    stereo_molecule = copy.deepcopy(molecule)
                    stereo_molecule._conformers = [conformer]

                    with NamedTemporaryFile(suffix=".sdf") as file:

                        stereo_molecule.to_file(file.name, "SDF")
                        stereo_molecule.from_file(
                            file.name, toolkit_registry=toolkit_registry)

        except UndefinedStereochemistryError:
            has_stereochemistry = False

        return has_stereochemistry
Example #2
0
    def test_chemical_environments_matches_RDK(self):
        """Test Topology.chemical_environment_matches"""
        from simtk.openmm import app

        toolkit_wrapper = RDKitToolkitWrapper()
        pdbfile = app.PDBFile(
            get_data_file_path(
                "systems/packmol_boxes/cyclohexane_ethanol_0.4_0.6.pdb"))
        # toolkit_wrapper = RDKitToolkitWrapper()
        # molecules = [Molecule.from_file(get_data_file_path(name)) for name in ('molecules/ethanol.mol2',
        #                                                                      'molecules/cyclohexane.mol2')]
        molecules = []
        molecules.append(Molecule.from_smiles("CCO"))
        molecules.append(Molecule.from_smiles("C1CCCCC1"))
        topology = Topology.from_openmm(pdbfile.topology,
                                        unique_molecules=molecules)
        # Count CCO matches
        matches = topology.chemical_environment_matches(
            "[C:1]-[C:2]-[O:3]", toolkit_registry=toolkit_wrapper)
        assert len(matches) == 143
        assert matches[0].topology_atom_indices == (1728, 1729, 1730)
        matches = topology.chemical_environment_matches(
            "[H][C:1]([H])([H])-[C:2]([H])([H])-[O:3][H]",
            toolkit_registry=toolkit_wrapper,
        )
        assert (len(matches) == 1716
                )  # 143 * 12 (there are 12 possible hydrogen mappings)
        assert matches[0].topology_atom_indices == (1728, 1729, 1730)
        # Search for a substructure that isn't there
        matches = topology.chemical_environment_matches(
            "[C][C:1]-[C:2]-[O:3]", toolkit_registry=toolkit_wrapper)
        assert len(matches) == 0
Example #3
0
def test_filter_cli(openff_methane: Molecule, runner):

    # Create an SDF file to filter.
    with stream_to_file("molecules.sdf") as writer:

        writer(
            Molecule.from_smiles("C1(=C(C(=C(C(=C1Cl)Cl)Cl)Cl)Cl)[O-].[Na+]"))
        writer(Molecule.from_smiles("CCC(C)(C)C(F)(F)CCCCC(F)(F)C(C)(C)CC"))

    arguments = [
        "--input", "molecules.sdf", "--output", "filtered.sdf", "--strip-ions"
    ]

    result = runner.invoke(filter_cli, arguments)

    if result.exit_code != 0:
        raise result.exception

    assert os.path.isfile("filtered.sdf")

    filtered_molecules = [
        molecule for molecule in stream_from_file("filtered.sdf")
    ]
    assert len(filtered_molecules) == 1

    filtered_molecule = filtered_molecules[0]

    assert (filtered_molecule.to_smiles(toolkit_registry=RDKitToolkitWrapper())
            == "[O-][c]1[c]([Cl])[c]([Cl])[c]([Cl])[c]([Cl])[c]1[Cl]")
Example #4
0
def test_enumerate_cli(openff_methane: Molecule, runner):

    # Create an SDF file to enumerate.
    buteneol = Molecule.from_smiles(r"C/C=C(/C)\O")

    with stream_to_file("molecules.sdf") as writer:

        writer(buteneol)
        writer(buteneol)

    arguments = ["--input", "molecules.sdf", "--output", "tautomers.sdf", "--tautomers"]

    result = runner.invoke(enumerate_cli, arguments)

    if result.exit_code != 0:
        raise result.exception

    assert os.path.isfile("tautomers.sdf")

    tautomers = [molecule for molecule in stream_from_file("tautomers.sdf")]
    assert len(tautomers) == 4

    assert {
        tautomer.to_smiles(
            explicit_hydrogens=False, toolkit_registry=RDKitToolkitWrapper()
        )
        for tautomer in tautomers
    } == {"C/C=C(/C)O", "C=C(O)CC", "CCC(C)=O", "CC=C(C)O"}
    assert len(ring_systems) == 1
    assert len(ring_systems[1][0]) == 5


@pytest.mark.parametrize(
    "toolkit_registry, expected_provenance",
    [
        (
            None,
            [
                toolkit.__class__.__name__
                for toolkit in GLOBAL_TOOLKIT_REGISTRY.registered_toolkits
            ],
        ),
        (RDKitToolkitWrapper(), ["RDKitToolkitWrapper"]),
        (ToolkitRegistry([RDKitToolkitWrapper]), ["RDKitToolkitWrapper"]),
    ],
)
def test_fragmenter_provenance(toolkit_registry, expected_provenance):
    class DummyFragmenter(Fragmenter):
        def _fragment(self, molecule: Molecule,
                      target_bond_smarts: str) -> FragmentationResult:

            return FragmentationResult(parent_smiles="[He:1]",
                                       fragments=[],
                                       provenance={})

    result = DummyFragmenter().fragment(Molecule.from_smiles("[He]"),
                                        ["[*:1]~[*:2]"], toolkit_registry)
Example #6
0
def compute_conformer_energies_from_file(filename):
    # Load in the molecule and its conformers.
    # Note that all conformers of the same molecule are loaded as separate Molecule objects
    # If using a OFF Toolkit version before 0.7.0, loading SDFs through RDKit and OpenEye may provide
    # different behavior in some cases. So, here we force loading through RDKit to ensure the correct behavior
    rdktkw = RDKitToolkitWrapper()
    loaded_molecules = Molecule.from_file(filename, toolkit_registry=rdktkw)
    # The logic below only works for lists of molecules, so if a
    # single molecule was loaded, cast it to list
    if type(loaded_molecules) is not list:
        loaded_molecules = [loaded_molecules]
    # Collatate all conformers of the same molecule
    # NOTE: This isn't necessary if you have already loaded or created multi-conformer molecules;
    # it is just needed because our SDF reader does not automatically collapse conformers.
    molecules = [loaded_molecules[0]]
    for molecule in loaded_molecules[1:]:
        if molecule == molecules[-1]:
            for conformer in molecule.conformers:
                molecules[-1].add_conformer(conformer)
        else:
            molecules.append(molecule)

    n_molecules = len(molecules)
    n_conformers = sum([mol.n_conformers for mol in molecules])
    print(
        f'{n_molecules} unique molecule(s) loaded, with {n_conformers} total conformers'
    )

    # Load the openff-1.1.0 force field appropriate for vacuum calculations (without constraints)
    from openff.toolkit.typing.engines.smirnoff import ForceField
    forcefield = ForceField('openff_unconstrained-1.1.0.offxml')
    # Loop over molecules and minimize each conformer
    for molecule in molecules:
        # If the molecule doesn't have a name, set mol.name to be the hill formula
        if molecule.name == '':
            molecule.name = Topology._networkx_to_hill_formula(
                molecule.to_networkx())
            print('%s : %d conformers' %
                  (molecule.name, molecule.n_conformers))
            # Make a temporary copy of the molecule that we can update for each minimization
        mol_copy = Molecule(molecule)
        # Make an OpenFF Topology so we can parameterize the system
        off_top = molecule.to_topology()
        print(
            f"Parametrizing {molecule.name} (may take a moment to calculate charges)"
        )
        system = forcefield.create_openmm_system(off_top)
        # Use OpenMM to compute initial and minimized energy for all conformers
        integrator = openmm.VerletIntegrator(1 * unit.femtoseconds)
        platform = openmm.Platform.getPlatformByName('Reference')
        omm_top = off_top.to_openmm()
        simulation = openmm.app.Simulation(omm_top, system, integrator,
                                           platform)

        # Print text header
        print(
            'Conformer         Initial PE         Minimized PE       RMS between initial and minimized conformer'
        )
        output = [[
            'Conformer', 'Initial PE (kcal/mol)', 'Minimized PE (kcal/mol)',
            'RMS between initial and minimized conformer (Angstrom)'
        ]]
        for conformer_index, conformer in enumerate(molecule.conformers):
            simulation.context.setPositions(conformer)
            orig_potential = simulation.context.getState(
                getEnergy=True).getPotentialEnergy()
            simulation.minimizeEnergy()
            min_state = simulation.context.getState(getEnergy=True,
                                                    getPositions=True)
            min_potential = min_state.getPotentialEnergy()

            # Calculate the RMSD between the initial and minimized conformer
            min_coords = min_state.getPositions()
            min_coords = np.array([[atom.x, atom.y, atom.z]
                                   for atom in min_coords]) * unit.nanometer
            mol_copy._conformers = None
            mol_copy.add_conformer(conformer)
            mol_copy.add_conformer(min_coords)
            rdmol = mol_copy.to_rdkit()
            rmslist = []
            rdMolAlign.AlignMolConformers(rdmol, RMSlist=rmslist)
            minimization_rms = rmslist[0]

            # Save the minimized conformer to file
            mol_copy._conformers = None
            mol_copy.add_conformer(min_coords)
            mol_copy.to_file(
                f'{molecule.name}_conf{conformer_index+1}_minimized.sdf',
                file_format='sdf')
            print(
                '%5d / %5d : %8.3f kcal/mol %8.3f kcal/mol  %8.3f Angstroms' %
                (conformer_index + 1, molecule.n_conformers,
                 orig_potential / unit.kilocalories_per_mole,
                 min_potential / unit.kilocalories_per_mole, minimization_rms))
            output.append([
                str(conformer_index + 1),
                f'{orig_potential/unit.kilocalories_per_mole:.3f}',
                f'{min_potential/unit.kilocalories_per_mole:.3f}',
                f'{minimization_rms:.3f}'
            ])
            # Write the results out to CSV
        with open(f'{molecule.name}.csv', 'w') as of:
            for line in output:
                of.write(','.join(line) + '\n')
                # Clean up OpenMM Simulation
        del simulation, integrator