def test_ligand_data(target, ligand_name, lig):
    m1 = Chem.MolFromSmiles(lig._data["smiles"][0])
    m1 = Chem.AddHs(m1)
    m2 = Chem.SDMolSupplier(
        os.path.join(
            targets.data_path,
            targets.get_target_dir(target),
            "02_ligands",
            ligand_name,
            "crd",
            f"{ligand_name}.sdf",
        ),
        removeHs=False,
    )[0]
    assert m1.GetNumAtoms() == m2.GetNumAtoms()
    m1.RemoveAllConformers()
    m2.RemoveAllConformers()
    assert pytest.approx(1.0, 1e-9) == DataStructs.FingerprintSimilarity(
        Chem.RDKFingerprint(m1), Chem.RDKFingerprint(m2))
    #            assert Chem.MolToMolBlock(m1) == Chem.MolToMolBlock(m2)
    res = rdFMCS.FindMCS([m1, m2])
    assert res.numAtoms == m1.GetNumAtoms()
    assert res.numBonds == m1.GetNumBonds()

    m3 = lig.get_molecule()
    m2 = Molecule.from_rdkit(m2)
    assert Molecule.are_isomorphic(m2, m3)
Beispiel #2
0
def minimise_energy_all_confs(mol, models = None, epsilon = 4, allow_undefined_stereo = True, **kwargs ):
    from simtk import unit
    from simtk.openmm import LangevinIntegrator
    from simtk.openmm.app import Simulation, HBonds, NoCutoff
    from rdkit import Chem
    from rdkit.Geometry import Point3D
    import mlddec
    import copy
    import tqdm
    mol = Chem.AddHs(mol, addCoords = True)

    if models is None:
        models  = mlddec.load_models(epsilon)
    charges = mlddec.get_charges(mol, models)

    from openforcefield.utils.toolkits import RDKitToolkitWrapper, ToolkitRegistry
    from openforcefield.topology import Molecule, Topology
    from openforcefield.typing.engines.smirnoff import ForceField
    # from openforcefield.typing.engines.smirnoff.forcefield import PME

    import parmed
    import numpy as np

    forcefield = ForceField(get_data_filename("modified_smirnoff99Frosst.offxml")) #FIXME better way of identifying file location

    tmp = copy.deepcopy(mol)
    tmp.RemoveAllConformers() #XXX workround for speed beacuse seemingly openforcefield records all conformer informations, which takes a long time. but I think this is a ill-practice

    molecule = Molecule.from_rdkit(tmp, allow_undefined_stereo = allow_undefined_stereo)
    molecule.partial_charges = unit.Quantity(np.array(charges), unit.elementary_charge)
    topology = Topology.from_molecules(molecule)
    openmm_system = forcefield.create_openmm_system(topology, charge_from_molecules= [molecule])

    structure = parmed.openmm.topsystem.load_topology(topology.to_openmm(), openmm_system)


    system = structure.createSystem(nonbondedMethod=NoCutoff, nonbondedCutoff=1*unit.nanometer, constraints=HBonds)

    integrator = LangevinIntegrator(273*unit.kelvin, 1/unit.picosecond, 0.002*unit.picoseconds)
    simulation = Simulation(structure.topology, system, integrator)

    out_mol = copy.deepcopy(mol)
    for i in tqdm.tqdm(range(out_mol.GetNumConformers())):
        conf = mol.GetConformer(i)
        structure.coordinates =  unit.Quantity(np.array([np.array(conf.GetAtomPosition(i)) for i in range(mol.GetNumAtoms())]), unit.angstroms)

        simulation.context.setPositions(structure.positions)

        simulation.minimizeEnergy()
        # simulation.step(1)

        coords = simulation.context.getState(getPositions = True).getPositions(asNumpy = True).value_in_unit(unit.angstrom)
        conf = out_mol.GetConformer(i)
        for j in range(out_mol.GetNumAtoms()):
            conf.SetAtomPosition(j, Point3D(*coords[j]))

    return out_mol
Beispiel #3
0
    def _convert_to_off(mol):
        import openforcefield

        if isinstance(mol, esp.Graph):
            return mol.mol

        elif isinstance(mol, openforcefield.topology.molecule.Molecule):
            return mol
        elif isinstance(mol, rdkit.Chem.rdchem.Mol):
            return Molecule.from_rdkit(mol)
        elif "openeye" in str(
                type(mol)):  # because we don't want to depend on OE
            return Molecule.from_openeye(mol)
Beispiel #4
0
    def get_energies(self, resolution=30, get_thetas=False):
        """
        It returns the energies of this energetic profile calculator.

        Parameters
        ----------
        resolution : float
            The resolution, in degrees, that is applied when rotating
            the dihedral rotatable bond. Default is 30 degrees
        get_thetas : bool
            Whether to return thetas (dihedral angles) or not. Default
            is False

        Returns
        -------
        dihedral_energies : a simtk.unit.Quantity object
            The array of energies represented with a simtk's Quantity
            object
        thetas : list[float]
            The array of thetas, only if requested in the corresponding
            function parameter
        """
        mol_with_conformers = \
            self.dihedral_benchmark.generate_dihedral_conformers(
                resolution)

        from openforcefield.topology import Molecule

        mol = Molecule.from_rdkit(mol_with_conformers)

        dihedral_energies = list()

        for i, conformer in enumerate(mol.conformers):
            energy = self._calc_energy(conformer)
            dihedral_energies.append(energy)

        if get_thetas:
            from rdkit.Chem import rdMolTransforms

            thetas = list()

            for conf in mol_with_conformers.GetConformers():
                thetas.append(
                    rdMolTransforms.GetDihedralDeg(
                        conf, *self.dihedral_benchmark.atom_indexes))

            return dihedral_energies, thetas

        return dihedral_energies
Beispiel #5
0
    def _rdkit_parameteriser(cls, mol, **kwargs):
        from rdkit import Chem
        from openforcefield.utils.toolkits import RDKitToolkitWrapper, ToolkitRegistry
        """
        Creates a parameterised system from rdkit molecule

        Parameters
        ----------
        mol : rdkit.Chem.Mol
        """

        try:
            forcefield = ForceField('test_forcefields/smirnoff99Frosst.offxml')
            molecule = Molecule.from_rdkit(
                mol, allow_undefined_stereo=cls.allow_undefined_stereo)
            if hasattr(cls, "_ddec_charger"):
                molecule.partial_charges = unit.Quantity(
                    np.array(cls._ddec_charger(mol, cls.rf)),
                    unit.elementary_charge)
            else:
                from openforcefield.utils.toolkits import AmberToolsToolkitWrapper
                molecule.compute_partial_charges_am1bcc(
                    toolkit_registry=AmberToolsToolkitWrapper())

            topology = Topology.from_molecules(molecule)
            openmm_system = forcefield.create_openmm_system(
                topology, charge_from_molecules=[molecule])

            ligand_pmd = parmed.openmm.topsystem.load_topology(
                topology.to_openmm(), openmm_system, molecule._conformers[0])
        except Exception as e:
            raise ValueError("Parameterisation Failed : {}".format(e))  #TODO

        ligand_pmd.title = cls.smiles

        for i in ligand_pmd.residues:
            i.name = 'LIG'

        tmp_dir = tempfile.mkdtemp()
        # We need all molecules as both pdb files (as packmol input)
        # and mdtraj.Trajectory for restoring bonds later.
        pdb_filename = tempfile.mktemp(suffix=".pdb", dir=tmp_dir)
        Chem.MolToPDBFile(mol, pdb_filename)
        cls.pdb_filename = pdb_filename
        cls.ligand_pmd = ligand_pmd
Beispiel #6
0
def zinc(first=-1, *args, **kwargs):
    import tarfile
    from os.path import exists
    from openforcefield.topology import Molecule
    from rdkit import Chem

    fname = 'parm_at_Frosst.tgz'
    url = 'http://www.ccl.net/cca/data/parm_at_Frosst/parm_at_Frosst.tgz'

    if not exists(fname):
        import urllib.request
        urllib.request.urlretrieve(url, fname)

    archive = tarfile.open(fname)
    zinc_file = archive.extractfile('parm_at_Frosst/zinc.sdf')
    _mols = Chem.ForwardSDMolSupplier(zinc_file, removeHs=False)

    count = 0
    gs = []

    for mol in _mols:
        try:
            gs.append(
                esp.Graph(
                    Molecule.from_rdkit(mol, allow_undefined_stereo=True)
                )
            )

            count += 1

        except:
            pass

        if first != -1 and count >= first:
            break

    return esp.data.dataset.GraphDataset(gs, *args, **kwargs)
Beispiel #7
0
    def serialise_system(self):
        """Create the OpenMM system; parametrise using frost; serialise the system."""

        # Create an openFF molecule from the rdkit molecule
        off_molecule = Molecule.from_rdkit(self.molecule.rdkit_mol,
                                           allow_undefined_stereo=True)

        # Make the OpenMM system
        off_topology = off_molecule.to_topology()

        forcefield = ForceField("openff_unconstrained-1.0.0.offxml")

        try:
            # Parametrise the topology and create an OpenMM System.
            system = forcefield.create_openmm_system(off_topology)
        except (
                UnassignedValenceParameterException,
                UnassignedBondParameterException,
                UnassignedProperTorsionParameterException,
                UnassignedAngleParameterException,
                UnassignedMoleculeChargeException,
                TypeError,
        ):
            # If this does not work then we have a molecule that is not in SMIRNOFF so we must add generics
            # and remove the charge handler to get some basic parameters for the moleucle
            new_bond = BondHandler.BondType(
                smirks="[*:1]~[*:2]",
                length="0 * angstrom",
                k="0.0 * angstrom**-2 * mole**-1 * kilocalorie",
            )
            new_angle = AngleHandler.AngleType(
                smirks="[*:1]~[*:2]~[*:3]",
                angle="0.0 * degree",
                k="0.0 * mole**-1 * radian**-2 * kilocalorie",
            )
            new_torsion = ProperTorsionHandler.ProperTorsionType(
                smirks="[*:1]~[*:2]~[*:3]~[*:4]",
                periodicity1="1",
                phase1="0.0 * degree",
                k1="0.0 * mole**-1 * kilocalorie",
                periodicity2="2",
                phase2="180.0 * degree",
                k2="0.0 * mole**-1 * kilocalorie",
                periodicity3="3",
                phase3="0.0 * degree",
                k3="0.0 * mole**-1 * kilocalorie",
                periodicity4="4",
                phase4="180.0 * degree",
                k4="0.0 * mole**-1 * kilocalorie",
                idivf1="1.0",
                idivf2="1.0",
                idivf3="1.0",
                idivf4="1.0",
            )
            new_vdw = vdWHandler.vdWType(
                smirks="[*:1]",
                epsilon=0 * unit.kilocalories_per_mole,
                sigma=0 * unit.angstroms,
            )
            new_generics = {
                "Bonds": new_bond,
                "Angles": new_angle,
                "ProperTorsions": new_torsion,
                "vdW": new_vdw,
            }
            for key, val in new_generics.items():
                forcefield.get_parameter_handler(key).parameters.insert(0, val)
            # This has to be removed as sqm will fail with unknown elements
            del forcefield._parameter_handlers["ToolkitAM1BCC"]
            del forcefield._parameter_handlers["Electrostatics"]
            # Parametrize the topology and create an OpenMM System.
            system = forcefield.create_openmm_system(off_topology)
            # This will tag the molecule so run.py knows that generics have been used.
            self.fftype = "generics"
        # Serialise the OpenMM system into the xml file
        with open("serialised.xml", "w+") as out:
            out.write(XmlSerializer.serializeSystem(system))
        rdkit_mol = Chem.MolFromSmiles(smiles)
        rdkit_mol_h = Chem.AddHs(rdkit_mol)
        AllChem.EmbedMolecule(
            rdkit_mol_h
        )  #generate a 3D conformation to avoid the errors before
        Chem.rdmolops.AssignAtomChiralTagsFromStructure(
            rdkit_mol_h
        )  #very important, else you'll repeat the errors from before

        AllChem.ComputeGasteigerCharges(
            rdkit_mol_h
        )  #thats not at all mandatory nore recommended. It's just to go faster here. Don't do that in your code !!!

        try:
            #here is where the magic happens
            mol = Molecule.from_rdkit(
                rdkit_mol_h)  #create a openff molecule from the rdkit one
            top = mol.to_topology()  #extract the topology

            ligand_system = force_field.create_openmm_system(
                top, charge_from_molecules=[mol]
            )  #create our openmm system (that's the simplest version here)

            #classical setup of an openmm simulation
            #here we'll do just a bit of minimization
            integrator = openmm.LangevinIntegrator(300 * unit.kelvin,
                                                   91 / unit.picosecond,
                                                   0.002 * unit.picoseconds)
            simulation = openmm.app.Simulation(top, ligand_system, integrator)
            simulation.context.setPositions(mol.conformers[0])
            print("     starting minimization")
            state = simulation.context.getState(getEnergy=True, getForces=True)
Beispiel #9
0
    def serialise_system(self):
        """Create the OpenMM system; parametrise using frost; serialise the system."""

        # Create an openFF molecule from the rdkit molecule
        off_molecule = Molecule.from_rdkit(self.molecule.rdkit_mol,
                                           allow_undefined_stereo=True)

        # Make the OpenMM system
        off_topology = off_molecule.to_topology()

        # Load the smirnoff99Frosst force field.
        forcefield = ForceField('test_forcefields/smirnoff99Frosst.offxml')

        try:
            # Parametrize the topology and create an OpenMM System.
            system = forcefield.create_openmm_system(off_topology)
        except (UnassignedValenceParameterException, TypeError,
                UnassignedValenceParameterException):
            # If this does not work then we have a moleucle that is not in SMIRNOFF so we must add generics
            # and remove the charge handler to get some basic parameters for the moleucle
            new_bond = BondHandler.BondType(
                smirks='[*:1]~[*:2]',
                length="0 * angstrom",
                k="0.0 * angstrom**-2 * mole**-1 * kilocalorie")
            new_angle = AngleHandler.AngleType(
                smirks='[*:1]~[*:2]~[*:3]',
                angle="0.0 * degree",
                k="0.0 * mole**-1 * radian**-2 * kilocalorie")
            new_torsion = ProperTorsionHandler.ProperTorsionType(
                smirks='[*:1]~[*:2]~[*:3]~[*:4]',
                periodicity1="1",
                phase1="0.0 * degree",
                k1="0.0 * mole**-1 * kilocalorie",
                periodicity2="2",
                phase2="180.0 * degree",
                k2="0.0 * mole**-1 * kilocalorie",
                periodicity3="3",
                phase3="0.0 * degree",
                k3="0.0 * mole**-1 * kilocalorie",
                periodicity4="4",
                phase4="180.0 * degree",
                k4="0.0 * mole**-1 * kilocalorie",
                idivf1="1.0",
                idivf2="1.0",
                idivf3="1.0",
                idivf4="1.0")

            new_vdw = vdWHandler.vdWType(smirks='[*:1]',
                                         epsilon=0 *
                                         unit.kilocalories_per_mole,
                                         sigma=0 * unit.angstroms)
            new_generics = {
                'Bonds': new_bond,
                'Angles': new_angle,
                'ProperTorsions': new_torsion,
                'vdW': new_vdw
            }
            for key, val in new_generics.items():
                forcefield.get_parameter_handler(key).parameters.insert(0, val)

            # This has to be removed as sqm will fail with unknown elements
            del forcefield._parameter_handlers['ToolkitAM1BCC']

            # Parametrize the topology and create an OpenMM System.
            system = forcefield.create_openmm_system(off_topology)
            # This will tag the molecule so run.py knows that generics have been used.
            self.fftype = 'generics'

        # Serialise the OpenMM system into the xml file
        with open('serialised.xml', 'w+') as out:
            out.write(XmlSerializer.serializeSystem(system))