Beispiel #1
0
    def _generate_charges(self, molecule):
        """Generates a set of partial charges for a molecule using
        the specified charge backend.

        Parameters
        ----------
        molecule: openforcefield.topology.Molecule
            The molecule to assign charges to.
        """

        if self.charge_backend == BuildTLeapSystem.ChargeBackend.OpenEye:

            from openforcefield.utils.toolkits import OpenEyeToolkitWrapper

            toolkit_wrapper = OpenEyeToolkitWrapper()

        elif self.charge_backend == BuildTLeapSystem.ChargeBackend.AmberTools:

            from openforcefield.utils.toolkits import (
                RDKitToolkitWrapper,
                AmberToolsToolkitWrapper,
                ToolkitRegistry,
            )

            toolkit_wrapper = ToolkitRegistry(toolkit_precedence=[
                RDKitToolkitWrapper, AmberToolsToolkitWrapper
            ])

        else:
            raise ValueError(f"Invalid toolkit specification.")

        molecule.generate_conformers(toolkit_registry=toolkit_wrapper)
        molecule.compute_partial_charges_am1bcc(
            toolkit_registry=toolkit_wrapper)
    def test_compute_partial_charges_net_charge(self):
        """Test OpenEyeToolkitWrapper compute_partial_charges() on a molecule with a net +1 charge"""
        toolkit_registry = ToolkitRegistry(
            toolkit_precedence=[AmberToolsToolkitWrapper, RDKitToolkitWrapper])
        smiles = '[H]C([H])([H])[N+]([H])([H])[H]'
        molecule = Molecule.from_smiles(smiles,
                                        toolkit_registry=toolkit_registry)
        molecule.generate_conformers(toolkit_registry=toolkit_registry)

        with pytest.raises(NotImplementedError) as excinfo:
            charge_model = 'notARealChargeModel'
            molecule.compute_partial_charges(toolkit_registry=toolkit_registry
                                             )  #, charge_model=charge_model)

        # TODO: Figure out why ['cm1', 'cm2'] fail
        for charge_model in ['gas', 'mul', 'bcc']:
            with pytest.raises(NotImplementedError) as excinfo:
                molecule.compute_partial_charges(
                    toolkit_registry=toolkit_registry
                )  #, charge_model=charge_model)
                charge_sum = 0 * unit.elementary_charge
                for pc in molecule._partial_charges:
                    charge_sum += pc
                assert 0.99 * unit.elementary_charge < charge_sum < 1.01 * unit.elementary_charge

        # For now, I'm just testing AM1-BCC (will test more when the SMIRNOFF spec for other charges is finalized)
        molecule.compute_partial_charges_am1bcc(
            toolkit_registry=toolkit_registry)
        charge_sum = 0 * unit.elementary_charge
        for pc in molecule._partial_charges:
            charge_sum += pc
        assert 0.999 * unit.elementary_charge < charge_sum < 1.001 * unit.elementary_charge
    def test_compute_partial_charges(self):
        """Test OpenEyeToolkitWrapper compute_partial_charges()"""
        toolkit_registry = ToolkitRegistry(
            toolkit_precedence=[AmberToolsToolkitWrapper, RDKitToolkitWrapper])

        smiles = '[H]C([H])([H])C([H])([H])[H]'
        molecule = Molecule.from_smiles(smiles,
                                        toolkit_registry=toolkit_registry)
        molecule.generate_conformers(toolkit_registry=toolkit_registry)

        # TODO: Implementation of these tests is pending a decision on the API for our charge model
        with pytest.raises(NotImplementedError) as excinfo:
            charge_model = 'notARealChargeModel'
            molecule.compute_partial_charges(toolkit_registry=toolkit_registry
                                             )  #, charge_model=charge_model)

        # ['cm1', 'cm2']
        for charge_model in ['gas', 'mul', 'bcc']:
            with pytest.raises(NotImplementedError) as excinfo:
                molecule.compute_partial_charges(
                    toolkit_registry=toolkit_registry
                )  #, charge_model=charge_model)
                charge_sum = 0 * unit.elementary_charge
                for pc in molecule._partial_charges:
                    charge_sum += pc
                assert charge_sum < 0.01 * unit.elementary_charge

        # For now, just test AM1-BCC while the SMIRNOFF spec for other charge models gets worked out
        molecule.compute_partial_charges_am1bcc(
            toolkit_registry=toolkit_registry)  # , charge_model=charge_model)
        charge_sum = 0 * unit.elementary_charge
        for pc in molecule._partial_charges:
            charge_sum += pc
        assert charge_sum < 0.002 * unit.elementary_charge
    def test_register_rdkit(self):
        """Test creation of toolkit registry with RDKit toolkit"""
        # Test registration of RDKitToolkitWrapper
        toolkit_precedence = [RDKitToolkitWrapper]
        registry = ToolkitRegistry(toolkit_precedence=toolkit_precedence,
                                   register_imported_toolkit_wrappers=False)
        #registry.register_toolkit(RDKitToolkitWrapper)
        assert set([type(c) for c in registry.registered_toolkits
                    ]) == set([RDKitToolkitWrapper])

        # Test ToolkitRegistry.resolve()
        assert registry.resolve(
            'to_smiles') == registry.registered_toolkits[0].to_smiles

        # Test ToolkitRegistry.call()
        smiles = '[H][C]([H])([H])[C]([H])([H])[H]'
        molecule = registry.call('from_smiles', smiles)
        smiles2 = registry.call('to_smiles', molecule)
        assert smiles == smiles2
Beispiel #5
0
def make_registry(toolkit: str) -> ToolkitRegistry:
    if toolkit.lower() == "openeye":
        from openforcefield.utils.toolkits import OpenEyeToolkitWrapper

        toolkit_registry = ToolkitRegistry(
            toolkit_precedence=[OpenEyeToolkitWrapper])
    elif toolkit.lower() == "rdkit":
        from openforcefield.utils.toolkits import RDKitToolkitWrapper

        toolkit_registry = ToolkitRegistry(
            toolkit_precedence=[RDKitToolkitWrapper])
    else:
        from openff.cli.utils.exceptions import UnsupportedToolkitError

        raise UnsupportedToolkitError(toolkit=toolkit)

    # Checks later assume that this is length 1. This should be changed if
    # multiple toolkits (i.e. RDKit and AmberTools) are needed at once
    assert len(toolkit_registry.registered_toolkit_versions) == 1
    return toolkit_registry
    def test_register_ambertools(self):
        """Test creation of toolkit registry with AmberToolsToolkitWrapper and RDKitToolkitWrapper
        """
        # Test registration of AmberToolsToolkitWrapper
        toolkit_precedence = [AmberToolsToolkitWrapper, RDKitToolkitWrapper]
        registry = ToolkitRegistry(toolkit_precedence=toolkit_precedence,
                                   register_imported_toolkit_wrappers=False)
        #registry.register_toolkit(AmberToolsToolkitWrapper)
        assert set([type(c) for c in registry.registered_toolkits
                    ]) == set([AmberToolsToolkitWrapper, RDKitToolkitWrapper])

        # Test ToolkitRegistry.resolve()
        registry.resolve('compute_partial_charges')
        assert registry.resolve(
            'compute_partial_charges'
        ) == registry.registered_toolkits[0].compute_partial_charges

        # Test ToolkitRegistry.call()
        registry.register_toolkit(RDKitToolkitWrapper)
        smiles = '[H]C([H])([H])C([H])([H])[H]'
        molecule = registry.call('from_smiles', smiles)
def get_conformer_energies(
    molecule: str,
    registry: ToolkitRegistry,
    forcefield: str,
    constrained: bool = False,
) -> List[Molecule]:

    _enforce_dependency_version("openforcefield", "0.7.0")

    file_format = molecule.split(".")[-1]

    loaded_molecules = registry.call(
        "from_file",
        molecule,
        file_format=file_format,
    )

    if type(loaded_molecules) is not list:
        loaded_molecules = [loaded_molecules]

    mols = [loaded_molecules[0]]
    for mol in loaded_molecules[1:]:
        if mol == mols[-1]:
            for conformer in mol.conformers:
                mols[-1].add_conformer(conformer)
        else:
            mols.append(molecule)

    n_molecules = len(mols)
    n_conformers = sum([mol.n_conformers for mol in mols])
    print(
        f"{n_molecules} unique molecule(s) loaded, with {n_conformers} total conformers"
    )

    ff = _get_forcefield(forcefield, constrained)

    mols_with_charges = []
    for mol in mols:
        if mol.partial_charges is not None:
            mols_with_charges.append(mol)

    # This is duplicated from generate_conformers
    minimized_mols = []
    for mol in mols:
        if mol in mols_with_charges:
            mol_with_charge = [mol]
        else:
            mol_with_charge = []
        simulation, partial_charges = _build_simulation(
            molecule=mol,
            forcefield=ff,
            mols_with_charge=mol_with_charge,
        )
        mol._partial_charges = partial_charges

        mol.properties["minimized against: "] = forcefield

        conformer_property_keys = [
            "original conformer energies (kcal/mol)",
            "minimized conformer energies (kcal/mol)",
            "RMSD of minimized conformers (angstrom)",
        ]
        for prop in conformer_property_keys:
            mol.properties[prop] = mol.n_conformers * [None]

        for i, conformer in enumerate(mol.conformers):
            simulation.context.setPositions(conformer)
            pre_energy, pre_positions = _get_conformer_data(simulation)
            mol.properties["original conformer energies (kcal/mol)"][
                i] = pre_energy

            simulation = _minimize_conformer(simulation, conformer)
            min_energy, min_positions = _get_conformer_data(simulation)
            mol.properties["minimized conformer energies (kcal/mol)"][
                i] = min_energy
            mol.conformers[i] = min_positions
            rms = _get_rms_two_conformers(mol, pre_positions, min_positions)
            mol.properties["RMSD of minimized conformers (angstrom)"][i] = rms
        minimized_mols.append(mol)

    return minimized_mols
Beispiel #8
0
    def _topology_molecule_to_mol2(topology_molecule, file_name,
                                   charge_backend):
        """Converts an `openforcefield.topology.TopologyMolecule` into a mol2 file,
        generating a conformer and AM1BCC charges in the process.

        .. todo :: This function uses non-public methods from the Open Force Field toolkit
                   and should be refactored when public methods become available

        Parameters
        ----------
        topology_molecule: openforcefield.topology.TopologyMolecule
            The `TopologyMolecule` to write out as a mol2 file. The atom ordering in
            this mol2 will be consistent with the topology ordering.
        file_name: str
            The filename to write to.
        charge_backend: BuildTLeapSystem.ChargeBackend
            The backend to use for conformer generation and partial charge
            calculation.
        """
        from openforcefield.topology import Molecule
        from simtk import unit as simtk_unit

        # Make a copy of the reference molecule so we can run conf gen / charge calc without modifying the original
        reference_molecule = copy.deepcopy(
            topology_molecule.reference_molecule)

        if charge_backend == BuildTLeapSystem.ChargeBackend.OpenEye:

            from openforcefield.utils.toolkits import OpenEyeToolkitWrapper

            toolkit_wrapper = OpenEyeToolkitWrapper()
            reference_molecule.generate_conformers(
                toolkit_registry=toolkit_wrapper)
            reference_molecule.compute_partial_charges_am1bcc(
                toolkit_registry=toolkit_wrapper)

        elif charge_backend == BuildTLeapSystem.ChargeBackend.AmberTools:

            from openforcefield.utils.toolkits import RDKitToolkitWrapper, AmberToolsToolkitWrapper, ToolkitRegistry

            toolkit_wrapper = ToolkitRegistry(toolkit_precedence=[
                RDKitToolkitWrapper, AmberToolsToolkitWrapper
            ])
            reference_molecule.generate_conformers(
                toolkit_registry=toolkit_wrapper)
            reference_molecule.compute_partial_charges_am1bcc(
                toolkit_registry=toolkit_wrapper)

        else:
            raise ValueError(f'Invalid toolkit specification.')

        # Get access to the parent topology, so we can look up the topology atom indices later.
        topology = topology_molecule.topology

        # Make and populate a new openforcefield.topology.Molecule
        new_molecule = Molecule()
        new_molecule.name = reference_molecule.name

        # Add atoms to the new molecule in the correct order
        for topology_atom in topology_molecule.atoms:

            # Force the topology to cache the topology molecule start indices
            topology.atom(topology_atom.topology_atom_index)

            new_molecule.add_atom(topology_atom.atom.atomic_number,
                                  topology_atom.atom.formal_charge,
                                  topology_atom.atom.is_aromatic,
                                  topology_atom.atom.stereochemistry,
                                  topology_atom.atom.name)

        # Add bonds to the new molecule
        for topology_bond in topology_molecule.bonds:

            # This is a temporary workaround to figure out what the "local" atom index of
            # these atoms is. In other words it is the offset we need to apply to get the
            # index if this were the only molecule in the whole Topology. We need to apply
            # this offset because `new_molecule` begins its atom indexing at 0, not the
            # real topology atom index (which we do know).
            index_offset = topology_molecule._atom_start_topology_index

            # Convert the `.atoms` generator into a list so we can access it by index
            topology_atoms = list(topology_bond.atoms)

            new_molecule.add_bond(
                topology_atoms[0].topology_atom_index - index_offset,
                topology_atoms[1].topology_atom_index - index_offset,
                topology_bond.bond.bond_order,
                topology_bond.bond.is_aromatic,
                topology_bond.bond.stereochemistry,
            )

        # Transfer over existing conformers and partial charges, accounting for the
        # reference/topology indexing differences
        new_conformers = np.zeros((reference_molecule.n_atoms, 3))
        new_charges = np.zeros(reference_molecule.n_atoms)

        # Then iterate over the reference atoms, mapping their indices to the topology
        # molecule's indexing system
        for reference_atom_index in range(reference_molecule.n_atoms):
            # We don't need to apply the offset here, since _ref_to_top_index is
            # already "locally" indexed for this topology molecule
            local_top_index = topology_molecule._ref_to_top_index[
                reference_atom_index]

            new_conformers[local_top_index, :] = reference_molecule.conformers[
                0][reference_atom_index].value_in_unit(simtk_unit.angstrom)
            new_charges[local_top_index] = reference_molecule.partial_charges[
                reference_atom_index].value_in_unit(
                    simtk_unit.elementary_charge)

        # Reattach the units
        new_molecule.add_conformer(new_conformers * simtk_unit.angstrom)
        new_molecule.partial_charges = new_charges * simtk_unit.elementary_charge

        # Write the molecule
        new_molecule.to_file(file_name, file_format='mol2')
def generate_conformers(
    molecule: str,
    registry: ToolkitRegistry,
    forcefield: str,
    constrained: bool = False,
    prefix: Optional[str] = None,
) -> List[Molecule]:

    _enforce_dependency_version("openforcefield", "0.7.1.")

    ff = _get_forcefield(forcefield, constrained)

    file_format = molecule.split(".")[-1]

    # TODO: This may not preserve order of loading molecules in
    ambiguous_stereochemistry = False
    try:
        raw_mols = registry.call(
            "from_file",
            molecule,
            file_format=file_format,
        )
    except UndefinedStereochemistryError:
        ambiguous_stereochemistry = True
        raw_mols = registry.call(
            "from_file",
            molecule,
            file_format=file_format,
            allow_undefined_stereo=True,
        )

    # When failing to parse molecules (i.e. attempting to read MOL2 with
    # RDKit, which is not supported) the toolkit can return an empty
    # list instead of raising a specific exception
    if raw_mols == []:
        from openff.cli.utils.exceptions import MoleculeParsingError

        raise MoleculeParsingError(toolkit_registry=registry,
                                   filename=molecule)

    mols = []
    for i, mol in enumerate(raw_mols):
        if prefix is not None:
            mol.name = prefix
        elif not mol.name:
            mol.name = "molecule"
        if len(raw_mols) > 1:
            mol.name += str(i)
        mols.append(mol)

    mols = _collapse_conformers(mols)

    # TODO: How to handle names of different stereoisomers? Just act like they're different conformers?
    if ambiguous_stereochemistry:
        mols_with_unpacked_stereoisomers = []
        for mol in mols:
            # TODO: This is a brute-force approach, it would be better to check stereo
            #  without needing to call enumerate_stereoisomers
            stereoisomers = mol.enumerate_stereoisomers()
            if stereoisomers:
                for i, iso in enumerate(stereoisomers):
                    iso.name = mol.name + "_stereoisomer" + str(i)
                    mols_with_unpacked_stereoisomers.append(iso)
            else:
                mols_with_unpacked_stereoisomers.append(mol)
        mols = mols_with_unpacked_stereoisomers

    for mol in mols:
        existing_conf = None
        if mol.conformers is not None:
            existing_conf = deepcopy(mol.conformers[0])
        mol.generate_conformers(
            toolkit_registry=registry,
            n_conformers=100,
            rms_cutoff=0.25 * unit.angstrom,
        )
        if existing_conf is not None:
            mol.add_conformer(existing_conf)

    # TODO: What happens if some molecules in a multi-molecule file have charges, others don't?
    mols_with_charges = []
    for mol in mols:
        if mol.partial_charges is not None:
            mols_with_charges.append(mol)

    mols_out = []
    for mol in mols:
        if mol in mols_with_charges:
            mol_with_charge = [mol]
        else:
            mol_with_charge = []
        simulation, partial_charges = _build_simulation(
            molecule=mol,
            forcefield=ff,
            mols_with_charge=mol_with_charge,
        )
        mol._partial_charges = partial_charges

        for i, conformer in enumerate(mol.conformers):
            simulation = _minimize_conformer(simulation, conformer)
            energy, positions = _get_conformer_data(simulation)
            mol = _reconstruct_mol_from_conformer(mol, positions)
            _add_metadata_to_mol(mol, energy, registry, forcefield)
            mols_out.append(mol)

    mols_out = _sort_mols(mols_out)

    return mols_out