def CisCheck(ifs):
    nrmol = 0
    nrcis = 0
    mol = oechem.OEGraphMol()
    while oechem.OEReadMolecule(ifs, mol):
        nrmol += 1
        print("===========================================================")
        print("Molecule: %s   Title: %s" % (nrmol, mol.GetTitle()))
        if not oechem.OEHasResidues(mol):
            oechem.OEPerceiveResidues(mol, oechem.OEPreserveResInfo_All)
        hv = oechem.OEHierView(mol)
        resiter = oechem.ConstOEHierResidueIter()
        resiter = hv.GetResidues()
        while (resiter.IsValid()):
            res = resiter.Target()
            resiter.Next()
            if not oechem.OEIsStandardProteinResidue(res):
                continue
            torsion = oechem.OEGetTorsion(res, oechem.OEProtTorType_Omega)
            if torsion != -100.0:
                if torsion < math.pi / 2.0 and torsion > -math.pi / 2.0:
                    if resiter.IsValid():
                        nextres = resiter.Target()
                        oenextres = nextres.GetOEResidue()
                        if oechem.OEGetResidueIndex(
                                oenextres) == oechem.OEResidueIndex_PRO:
                            continue
                    nrcis += 1
                    oeres = res.GetOEResidue()
                    print("%s %s %2d omega torsion = %.2f degree" %
                          (oeres.GetName(), oeres.GetChainID(),
                           oeres.GetResidueNumber(),
                           torsion * oechem.cvar.Rad2Deg))
        print(" %d cis amide bond(s) identified\n" % nrcis)
Exemple #2
0
def order_check(mol, fname):
    """
    TO REMOVE
    This function is used to debug
    """
    import logging
    logger = logging.getLogger('Testing')
    hdlr = logging.FileHandler(fname)
    formatter = logging.Formatter('%(message)s')
    hdlr.setFormatter(formatter)
    logger.addHandler(hdlr)
    logger.setLevel(logging.INFO)

    #hv = oechem.OEHierView(mol, oechem.OEAssumption_BondedResidue + oechem.OEAssumption_ResPerceived)
    hv = oechem.OEHierView(mol)

    for chain in hv.GetChains():
        logger.info('{}'.format(chain.GetChainID()))
        for frag in chain.GetFragments():
            for hres in frag.GetResidues():
                logger.info('\t{} {}'.format(hres.GetOEResidue().GetName(), hres.GetOEResidue().GetResidueNumber()))
                for oe_at in hres.GetAtoms():
                    logger.info('\t\t{} {}'.format(oe_at.GetName(), oe_at.GetIdx()))

    return
def ResCount(ifs):
    nrmol = 0
    mol = oechem.OEGraphMol()
    while oechem.OEReadMolecule(ifs, mol):
        nrmol += 1
        nratom = 0
        nrwat = 0
        nrres = 0
        nrfrag = 0
        nrchain = 0
        if not oechem.OEHasResidues(mol):
            oechem.OEPerceiveResidues(mol, oechem.OEPreserveResInfo_All)
        hv = oechem.OEHierView(mol)
        for chain in hv.GetChains():
            nrchain += 1
            for frag in chain.GetFragments():
                nrfrag += 1
                for res in frag.GetResidues():
                    nrres += 1
                    if oechem.OEGetResidueIndex(res.GetOEResidue()) == oechem.OEResidueIndex_HOH:
                        nrwat += 1
                    else:
                        for atom in res.GetAtoms():
                            nratom += 1

        print("===============================================")
        print("Molecule : %d Title: %s" % (nrmol, mol.GetTitle()))
        print("Chains   : %d" % nrchain)
        print("Fragments: %d" % nrfrag)
        print("Residues : %d (%d waters)" % (nrres, nrwat))
        print("Atoms    : %d" % nratom)
Exemple #4
0
def get_sequence(structure: oechem.OEGraphMol) -> str:
    """
    Get the amino acid sequence with one letter characters of an OpenEye molecule holding a protein structure. All
    residues not perceived as amino acid will receive the character 'X'.

    Parameters
    ----------
    structure: oechem.OEGraphMol
        An OpenEye molecule holding a protein structure.

    Returns
    -------
    sequence: str
        The amino acid sequence of the protein with one letter characters.
    """
    sequence = []
    hv = oechem.OEHierView(structure)
    for residue in hv.GetResidues():
        if oechem.OEIsStandardProteinResidue(residue):
            sequence.append(
                oechem.OEGetAminoAcidCode(
                    oechem.OEGetResidueIndex(residue.GetResidueName())))
        else:
            sequence.append("X")
    sequence = "".join(sequence)
    return sequence
Exemple #5
0
def ResHist(ifs):
    nrmol = 0
    mol = oechem.OEGraphMol()
    while oechem.OEReadMolecule(ifs, mol):
        nrmol += 1
        print("==============================")
        print("Molecule: %d Title: %s" % (nrmol, mol.GetTitle()))

        nrres = 0
        resmap = {}
        if not oechem.OEHasResidues(mol):
            oechem.OEPerceiveResidues(mol, oechem.OEPreserveResInfo_All)
        hv = oechem.OEHierView(mol)
        for res in hv.GetResidues():
            nrres += 1
            name = res.GetOEResidue().GetName()
            if name in resmap:
                resmap[name] += 1
            else:
                resmap[name] = 1

    sortedres = sorted(resmap.keys())
    for name in sortedres:
        percent = 100.0*float(resmap[name])/float(nrres)
        print("%3s %3d  %4.1f %%" % (name, resmap[name], percent))
Exemple #6
0
def renumber_structure(target_structure: oechem.OEGraphMol,
                       residue_numbers: List[int]) -> oechem.OEGraphMol:
    """
    Renumber the residues of a protein structure according to the given list of residue numbers.
    Parameters
    ----------
    target_structure: oechem.OEGraphMol
        An OpenEye molecule holding the protein structure to renumber.
    residue_numbers: list of int
        A list of residue numbers matching the order of the target structure.
    Returns
    -------
    renumbered_structure: oechem.OEGraphMol
        An OpenEye molecule holding the cropped protein structure.
    """
    import copy

    renumbered_structure = copy.deepcopy(
        target_structure)  # don't touch input structure
    hierview = oechem.OEHierView(renumbered_structure)
    structure_residues = hierview.GetResidues()
    for residue_number, structure_residue in zip(residue_numbers,
                                                 structure_residues):
        structure_residue_mod = structure_residue.GetOEResidue()
        structure_residue_mod.SetResidueNumber(residue_number)
        for residue_atom in structure_residue.GetAtoms():
            oechem.OEAtomSetResidue(residue_atom, structure_residue_mod)

    return renumbered_structure
Exemple #7
0
def MakeAlpha(ifs, ofs):
    phival = math.pi / -3.0
    psival = math.pi / -3.0
    chival = math.pi
    nrphis = 0
    nrpsis = 0
    nrchis = 0
    mol = oechem.OEGraphMol()
    while oechem.OEReadMolecule(ifs, mol):
        if not oechem.OEHasResidues(mol):
            oechem.OEPerceiveResidues(mol, oechem.OEPreserveResInfo_All)
        # remove cross-links
        for bond in mol.GetBonds():
            if bond.GetBgn().GetAtomicNum() == oechem.OEElemNo_S and \
               bond.GetEnd().GetAtomicNum() == oechem.OEElemNo_S:
                mol.DeleteBond(bond)

        oechem.OEFindRingAtomsAndBonds(mol)
        hv = oechem.OEHierView(mol)
        for res in hv.GetResidues():
            if not oechem.OEIsStandardProteinResidue(res):
                continue

            # set psi and phi angles
            if not oechem.OESetTorsion(res, oechem.OEProtTorType_Phi, phival):
                oeres = res.GetOEResidue()
                print("Unable to set phi for %s %d" %
                      (oeres.GetName(), oeres.GetResidueNumber()))
            else:
                nrphis += 1

            if not oechem.OESetTorsion(res, oechem.OEProtTorType_Psi, psival):
                oeres = res.GetOEResidue()
                print("Unable to set psi for %s %d" %
                      (oeres.GetName(), oeres.GetResidueNumber()))
            else:
                nrpsis += 1

            # set chis
            if oechem.OEGetResidueIndex(
                    res.GetOEResidue().GetName()) == oechem.OEResidueIndex_PRO:
                continue  # It does not make sense to set Proline chi angles to 180

            for chi in oechem.OEGetChis(res):
                if not oechem.OESetTorsion(res, chi, chival):
                    oeres = res.GetOEResidue()
                    print("Unable to set chi %s for %s %d" %
                          (oechem.OEGetProteinTorsionName(chi),
                           oeres.GetName(), oeres.GetResidueNumber()))
                else:
                    nrchis += 1
        oechem.OEWriteMolecule(ofs, mol)

    print(nrphis, " phi  torsion angle set to ", phival * oechem.cvar.Rad2Deg)
    print(nrpsis, " psi  torsion angle set to ", psival * oechem.cvar.Rad2Deg)
    print(nrchis, " chis torsion angle set to ", chival * oechem.cvar.Rad2Deg)
Exemple #8
0
def strip_water_ions(in_system):
    """
    This function remove waters and ions molecules
    from the input system

    Parameters:
    ----------
    in_system : oechem.OEMol
        The bio-molecular system to clean
    opt: python dictionary
        The system option

    Output:
    -------
    clean_system : oechem.OEMol
        The cleaned system

    """
    # Copy the input system
    system = in_system.CreateCopy()

    # Create a bit vector mask
    bv = oechem.OEBitVector(system.GetMaxAtomIdx())
    bv.NegateBits()

    # Create a Hierarchical View of the protein system
    hv = oechem.OEHierView(
        system,
        oechem.OEAssumption_BondedResidue + oechem.OEAssumption_ResPerceived)

    # Looping over the system residues
    for chain in hv.GetChains():
        for frag in chain.GetFragments():
            for hres in frag.GetResidues():
                res = hres.GetOEResidue()

                # Check if a residue is a mono atomic ion
                natoms = 0
                for at in hres.GetAtoms():
                    natoms += 1

                # Set the atom bit mask off
                if oechem.OEGetResidueIndex(
                        res) == oechem.OEResidueIndex_HOH or natoms == 1:
                    # Set Bit mask
                    atms = hres.GetAtoms()
                    for at in atms:
                        bv.SetBitOff(at.GetIdx())

    # Extract the system without waters or ions
    pred = oechem.OEAtomIdxSelected(bv)
    clean_system = oechem.OEMol()
    oechem.OESubsetMol(clean_system, system, pred)

    return clean_system
Exemple #9
0
def update_residue_identifiers(
        structure: oechem.OEGraphMol,
        keep_protein_residue_ids: bool = True) -> oechem.OEGraphMol:
    """
    Updates the atom, residue and chain ids of the given molecular structure. All residues become part of chain A. Atom
    ids will start from 1. Residue will start from 1, except protein residue ids are fixed. This is especially useful,
    if molecules were merged, which can result in overlapping atom and residue ids as well as separate chains.

    Parameters
    ----------
    structure: oechem.OEGraphMol
        The OpenEye molecule structure for updating atom and residue ids.
    keep_protein_residue_ids: bool
        If the protein residues should be kept.

    Returns
    -------
    structure: oechem.OEGraphMol
        The OpenEye molecule structure with updated atom and residue ids.
    """
    # update residue ids
    residue_number = 0
    hierarchical_view = oechem.OEHierView(structure)
    for hv_residue in hierarchical_view.GetResidues():
        residue = hv_residue.GetOEResidue()
        residue.SetChainID("A")
        if not residue.IsHetAtom() and keep_protein_residue_ids:
            if residue.GetName() == "NME" and residue.GetResidueNumber(
            ) == residue_number:
                # NME residues may have same id as preceding residue
                residue_number += 1
            else:
                # catch protein residue id if those should not be touched
                residue_number = residue.GetResidueNumber()

        else:
            # change residue id
            residue_number += 1
        residue.SetResidueNumber(residue_number)
        for atom in hv_residue.GetAtoms():
            oechem.OEAtomSetResidue(atom, residue)

    # update residue identifiers, except atom names, residue ids,
    # residue names, fragment number, chain id and record type
    preserved_info = (oechem.OEPreserveResInfo_ResidueNumber
                      | oechem.OEPreserveResInfo_ResidueName
                      | oechem.OEPreserveResInfo_HetAtom
                      | oechem.OEPreserveResInfo_AtomName
                      | oechem.OEPreserveResInfo_FragmentNumber
                      | oechem.OEPreserveResInfo_ChainID)
    oechem.OEPerceiveResidues(structure, preserved_info)

    return structure
def LoopOverResAtoms(ims):
    for mol in ims.GetOEGraphMols():
        # @ <SNIPPET-PERCEIVE-RES>
        if not oechem.OEHasResidues(mol):
            oechem.OEPerceiveResidues(mol, oechem.OEPreserveResInfo_All)
        # @ </SNIPPET-PERCEIVE-RES>

        # @ <SNIPPET-RES-ATOMS-CORE>
        hv = oechem.OEHierView(mol)
        hres = hv.GetResidue("A", "LEU", 27)
        for atom in hres.GetAtoms():  # only this residue's atoms
            res = oechem.OEAtomGetResidue(atom)
            print(res.GetSerialNumber(), atom.GetName())
Exemple #11
0
def test_assign_caps(package, resource, real_termini, caps):
    """Compare results to expected caps."""
    from openeye import oechem

    with resources.path(package, resource) as path:
        molecule = read_molecules(str(path))[0]
        molecule = select_altloc(molecule, "A")
        molecule = assign_caps(molecule, real_termini)
        hier_view = oechem.OEHierView(molecule)
        found_caps = set([
            residue.GetResidueName() for residue in hier_view.GetResidues()
            if residue.GetResidueName() in ["ACE", "NME"]
        ])
        assert found_caps == caps
Exemple #12
0
def test_delete_partial_residues(package, resource, delete_backbone_C,
                                 sequence):
    """Compare results to expected sequence."""
    from openeye import oechem

    with resources.path(package, resource) as path:
        structure = read_molecules(str(path))[0]
        if delete_backbone_C:
            hier_view = oechem.OEHierView(structure)
            hier_residue = hier_view.GetResidue("A", delete_backbone_C[:3],
                                                int(delete_backbone_C[3:]))
            for atom in hier_residue.GetAtoms():
                atom_name = atom.GetName().strip()
                if atom_name == "C":
                    structure.DeleteAtom(atom)
        structure = delete_partial_residues(structure)
        assert get_sequence(structure) == sequence
Exemple #13
0
def test_renumber_structure(package, resource, residue_ids, expectation):
    """
    Compare results to have the given residue IDs.
    """
    from openeye import oechem

    with resources.path(package, resource) as path:
        structure = read_molecules(str(path))[0]
        with expectation:
            structure = renumber_structure(structure, residue_ids)
            hierview = oechem.OEHierView(structure)
            new_residue_ids = [
                residue.GetResidueNumber()
                for residue in hierview.GetResidues()
            ]
            assert len(residue_ids) == len(new_residue_ids)
            assert all(
                [True for x, y in zip(residue_ids, new_residue_ids) if x == y])
Exemple #14
0
        def residues(ls):
            """
            This function select residues based on the residue numbers. An example of
            selection can be:
            mask = 'resid A:16 17 19 B:1'
            """
            # List residue atom index to be restrained
            res_atom_set = set()

            # Dictionary of lists with the chain residues selected to be restrained
            # e.g. {chainA:[res1, res15], chainB:[res19, res17]}
            chain_dic = {'': []}

            # Fill out the chain dictionary
            i = 0
            while i < len(ls):
                if ls[i].isdigit():
                    chain_dic[''].append(int(ls[i]))
                    i += 1
                else:
                    try:
                        chain_dic[ls[i]].append(int(ls[i + 2]))
                    except:
                        chain_dic[ls[i]] = []
                        chain_dic[ls[i]].append(int(ls[i + 2]))
                    i += 3

            # Loop over the molecular system to select the atom indexes to be selected
            hv = oechem.OEHierView(
                system, oechem.OEAssumption_BondedResidue +
                oechem.OEAssumption_ResPerceived)
            for chain in hv.GetChains():
                chain_id = chain.GetChainID()
                if chain_id not in chain_dic:
                    continue
                for frag in chain.GetFragments():
                    for hres in frag.GetResidues():
                        res_num = hres.GetOEResidue().GetResidueNumber()
                        if res_num not in chain_dic[chain_id]:
                            continue
                        for oe_at in hres.GetAtoms():
                            res_atom_set.add(oe_at.GetIdx())

            return res_atom_set
Exemple #15
0
def CalcResCounts(mol):
    hv = oechem.OEHierView(mol)
    chainCt = 0
    fragCt = 0
    resCt = 0
    watCt = 0
    for chain in hv.GetChains():
        chainCt += 1
        for frag in chain.GetFragments():
            fragCt += 1
            for hres in frag.GetResidues():
                resCt += 1
                if (oechem.OEGetResidueIndex(
                        hres.GetOEResidue()) == oechem.OEResidueIndex_HOH):
                    watCt += 1
    print("Molecule : %s" % mol.GetTitle())
    print("Chains   : %d" % chainCt)
    print("Fragments: %d" % fragCt)
    print("Residues : %d (%d waters)" % (resCt, watCt))
Exemple #16
0
def SubSetRes(ifs, ofs, chainid, resname, resnum):
    adjustHCount = True
    mol = oechem.OEGraphMol()
    while oechem.OEReadMolecule(ifs, mol):
        if not oechem.OEHasResidues(mol):
            oechem.OEPerceiveResidues(mol, oechem.OEPreserveResInfo_All)
        hv = oechem.OEHierView(mol)
        res = hv.GetResidue(chainid, resname, resnum)
        if res.GetOEResidue().GetName() is None:
            oechem.OEThrow.Fatal("Failed to find residue")
        atomiter = res.GetAtoms()
        member = oechem.OEIsAtomMember(atomiter)
        resmol = oechem.OEGraphMol()
        oechem.OESubsetMol(resmol, mol, member, adjustHCount)
        if chainid == " ":
            resmol.SetTitle("%s %d" % (resname, resnum))
        else:
            resmol.SetTitle("%s %s %d" % (resname, chainid, resnum))

        oechem.OEWriteMolecule(ofs, resmol)
Exemple #17
0
def ShowPhiPsi(ifs):
    nrmol = 0
    mol = oechem.OEGraphMol()
    while oechem.OEReadMolecule(ifs, mol):
        nrmol += 1
        print("================================================")
        print("Molecule: %d  Title: %s" % (nrmol, mol.GetTitle()))

        if not oechem.OEHasResidues(mol):
            oechem.OEPerceiveResidues(mol, oechem.OEPreserveResInfo_All)
        hv = oechem.OEHierView(mol)

        for res in hv.GetResidues():
            if not oechem.OEIsStandardProteinResidue(res):
                continue

            phi = oechem.OEGetPhi(res)
            psi = oechem.OEGetPsi(res)

            oeres = res.GetOEResidue()
            print("  %s %s %d (PHI=%.2f, PSI=%.2f)" % (oeres.GetName(),
                                                       oeres.GetChainID(),
                                                       oeres.GetResidueNumber(),
                                                       phi, psi))
Exemple #18
0
def test_update_residue_identifiers(
    package,
    resource,
    keep_protein_residue_ids,
    keep_chain_id,
    chain_ids,
    first_residue_id,
    last_residue_id,
):
    """
    Compare results to contain expected chains, to start with atom serial 1 and for correct residue ID handling.
    """
    from openeye import oechem

    with resources.path(package, resource) as path:
        structure = read_molecules(str(path))[0]
        structure = update_residue_identifiers(
            structure,
            keep_protein_residue_ids=keep_protein_residue_ids,
            keep_chain_ids=keep_chain_id,
        )
        hierview = oechem.OEHierView(structure)
        # check chain IDs
        found_chain_ids = [
            chain.GetChainID() for chain in hierview.GetChains()
        ]
        assert set(found_chain_ids) == set(chain_ids)
        # check atom numbering starts with one
        atoms = structure.GetAtoms()
        assert oechem.OEAtomGetResidue(atoms.next()).GetSerialNumber() == 1
        # check max and min residue ID
        residue_ids = [
            residue.GetResidueNumber() for residue in hierview.GetResidues()
        ]
        assert min(residue_ids) == first_residue_id
        assert max(residue_ids) == last_residue_id
Exemple #19
0
def oemol_to_openmmTop(mol):
    """
    This function converts an OEMol to an openmm topology
    The OEMol coordinates are assumed to be in Angstrom unit

    Parameters:
    -----------
    mol: OEMol molecule
        The molecule to convert

    Return:
    -------
    topology : OpenMM Topology
        The generated OpenMM topology
    positions : OpenMM Quantity
        The molecule atom positions associated with the
        generated topology in Angstrom units
    """
    # OE Hierarchical molecule view
    hv = oechem.OEHierView(
        mol, oechem.OEAssumption_BondedResidue +
        oechem.OEAssumption_ResPerceived + oechem.OEAssumption_PDBOrder)

    # Create empty OpenMM Topology
    topology = app.Topology()
    # Dictionary used to map oe atoms to openmm atoms
    oe_atom_to_openmm_at = {}

    for chain in hv.GetChains():

        # Create empty OpenMM Chain
        openmm_chain = topology.addChain(chain.GetChainID())

        for frag in chain.GetFragments():

            for hres in frag.GetResidues():

                # Get OE residue
                oe_res = hres.GetOEResidue()
                # Create OpenMM residue
                openmm_res = topology.addResidue(oe_res.GetName(),
                                                 openmm_chain)

                for oe_at in hres.GetAtoms():
                    # Select atom element based on the atomic number
                    element = app.element.Element.getByAtomicNumber(
                        oe_at.GetAtomicNum())
                    # Add atom OpenMM atom to the topology
                    openmm_at = topology.addAtom(oe_at.GetName(), element,
                                                 openmm_res)
                    openmm_at.index = oe_at.GetIdx()
                    # Add atom to the mapping dictionary
                    oe_atom_to_openmm_at[oe_at] = openmm_at

    if topology.getNumAtoms() != mol.NumAtoms():
        raise ValueError(
            "OpenMM topology and OEMol number of atoms mismatching: "
            "OpenMM = {} vs OEMol  = {}".format(topology.getNumAtoms(),
                                                mol.NumAtoms()))

    # Count the number of bonds in the openmm topology
    omm_bond_count = 0

    def IsAmideBond(oe_bond):

        # This supporting function checks if the passed bond is an amide bond or not.
        # Our definition of amide bond C-N between a Carbon and a Nitrogen atom is:
        #          O
        #          ║
        #  CA or O-C-N-
        #            |

        # The amide bond C-N is a single bond
        if oe_bond.GetOrder() != 1:
            return False

        atomB = oe_bond.GetBgn()
        atomE = oe_bond.GetEnd()

        # The amide bond is made by Carbon and Nitrogen atoms
        if not (atomB.IsCarbon() and atomE.IsNitrogen() or
                (atomB.IsNitrogen() and atomE.IsCarbon())):
            return False

        # Select Carbon and Nitrogen atoms
        if atomB.IsCarbon():
            C_atom = atomB
            N_atom = atomE
        else:
            C_atom = atomE
            N_atom = atomB

        # Carbon and Nitrogen atoms must have 3 neighbour atoms
        if not (C_atom.GetDegree() == 3 and N_atom.GetDegree() == 3):
            return False

        double_bonds = 0
        single_bonds = 0

        for bond in C_atom.GetBonds():
            # The C-O bond can be single or double.
            if (bond.GetBgn() == C_atom and bond.GetEnd().IsOxygen()) or \
                    (bond.GetBgn().IsOxygen() and bond.GetEnd() == C_atom):
                if bond.GetOrder() == 2:
                    double_bonds += 1
                if bond.GetOrder() == 1:
                    single_bonds += 1
            # The CA-C bond is single
            if (bond.GetBgn() == C_atom and bond.GetEnd().IsCarbon()) or \
                    (bond.GetBgn().IsCarbon() and bond.GetEnd() == C_atom):
                if bond.GetOrder() == 1:
                    single_bonds += 1
        # Just one double and one single bonds are connected to C
        # In this case the bond is an amide bond
        if double_bonds == 1 and single_bonds == 1:
            return True
        else:
            return False

    # Creating bonds
    for oe_bond in mol.GetBonds():

        omm_bond_count += 1

        # Set the bond type
        if oe_bond.GetType() is not "":
            if oe_bond.GetType() in [
                    'Single', 'Double', 'Triple', 'Aromatic', 'Amide'
            ]:
                omm_bond_type = oe_bond.GetType()
            else:
                omm_bond_type = None
        else:
            if oe_bond.IsAromatic():
                oe_bond.SetType("Aromatic")
                omm_bond_type = "Aromatic"
            elif oe_bond.GetOrder() == 2:
                oe_bond.SetType("Double")
                omm_bond_type = "Double"
            elif oe_bond.GetOrder() == 3:
                oe_bond.SetType("Triple")
                omm_bond_type = "Triple"
            elif IsAmideBond(oe_bond):
                oe_bond.SetType("Amide")
                omm_bond_type = "Amide"
            elif oe_bond.GetOrder() == 1:
                oe_bond.SetType("Single")
                omm_bond_type = "Single"
            else:
                omm_bond_type = None

        topology.addBond(oe_atom_to_openmm_at[oe_bond.GetBgn()],
                         oe_atom_to_openmm_at[oe_bond.GetEnd()],
                         type=omm_bond_type,
                         order=oe_bond.GetOrder())

    if omm_bond_count != mol.NumBonds():
        raise ValueError(
            "OpenMM topology and OEMol number of bonds mismatching: "
            "OpenMM = {} vs OEMol  = {}".format(omm_bond_count,
                                                mol.NumBonds()))

    dic = mol.GetCoords()
    positions = [Vec3(v[0], v[1], v[2])
                 for k, v in dic.items()] * unit.angstrom

    return topology, positions
Exemple #20
0
def oesolvate(solute,
              density=1.0,
              padding_distance=10.0,
              distance_between_atoms=2.5,
              solvents='tip3p',
              molar_fractions='1.0',
              geometry='box',
              close_solvent=True,
              salt='[Na+], [Cl-]',
              salt_concentration=0.0,
              neutralize_solute=True,
              verbose=False,
              return_components=False,
              **kargs):
    """
    This function solvates the passed solute in a cubic box or a sphere by using Packmol. Packmol
    creates an initial point for molecular dynamics simulations by packing molecule in defined regions
    of space. For additional info:
    http://www.ime.unicamp.br/~martinez/packmol/home.shtml

    The geometry volume is estimated by the using the padding parameter and the solute size.
    The number of solvent molecules is calculated by using the specified density and volume.
    Solvent molecules are specified as comma separated smiles strings. The molar fractions
    of each solvent molecule are specified in a similar fashion. By default if the solute is
    charged counter ions are added to neutralize it

    Parameters:
    -----------
    solute: OEMol molecule
        The solute to solvate
    density: float
        The solution density in g/ml
    padding_distance: float
        The largest dimension of the solute (along the x, y, or z axis) is determined (in A), 
        and a cubic box of size (largest dimension)+2*padding is used
    distance_between_atoms: float
        The minimum distance between atoms in A
    solvents: python string
        A comma separated smiles string or keywords for the solvent molecules.
        Special water models can be selected by using the keywords:
        tip3p for TIP3P water model geometry
    molar_fractions: python string
        A comma separated molar fraction string of the solvent molecules
    close_solvent: boolean
        If True solvent molecules will be placed very close to the solute
    salt: python string
        A comma separated string of the dissociated salt in solution
    salt_concentration: float
        Salt concentration in millimolar
    neutralize_solute: boolean
        If True counter-ions will be added to the solution to neutralize the solute
    verbose: Bool
        If True verbose mode is enabled
    return_components: Bool
        If True the added solvent molecules are also returned as OEMol

    Return:
    -------
    oe_mol: OEMol
        The solvated system. If the selected geometry is a box a SD tag with
        name 'box_vector' is attached the output molecule containing
        the system box vectors.
    oe_mol_components: OEMol
        If the return_components flag is True the added solvent molecules are
        returned as an additional OEMol
    """
    def BoundingBox(molecule):
        """
        This function calculates the Bounding Box of the passed
        molecule

        molecule: OEMol

        return: bb (numpy array)
            the calculated bounding box is returned as numpy array:
            [(xmin,ymin,zmin), (xmax,ymax,zmax)]
        """
        coords = [v for k, v in molecule.GetCoords().items()]
        np_coords = np.array(coords)
        min_coord = np_coords.min(axis=0)
        max_coord = np_coords.max(axis=0)
        bb = np.array([min_coord, max_coord])
        return bb

    if shutil.which("packmol") is None:
        raise (IOError("Packmol executable not found"))

    # Extract solvent smiles strings and mole fractions
    solvents = [sm.strip() for sm in solvents.split(',')]
    fractions = [float(mf) for mf in molar_fractions.split(',')]

    # If the smiles string and mole fractions lists have different lengths raise an error
    if len(solvents) != len(fractions):
        raise ValueError(
            "Selected solvent number and selected molar fraction number mismatch: {} vs {}"
            .format(len(solvents), len(fractions)))

    # Remove smiles string with 0.0 mole fraction
    solvent_smiles = [
        solvents[i] for i, v in enumerate(fractions) if fractions[i]
    ]
    mol_fractions = [mf for mf in fractions if mf]

    # Mole fractions are non-negative numbers
    if any([v < 0.0 for v in mol_fractions]):
        raise ValueError("Error: Mole fractions are non-negative real numbers")

    # Mole fractions must sum up to 1.0
    if abs(sum(mol_fractions) - 1.0) > 0.001:
        oechem.OEThrow.Error("Error: Mole fractions do not sum up to 1.0")

    if geometry not in ['box', 'sphere']:
        raise ValueError(
            "Error geometry: the supported geometries are box and sphere not {}"
            .format(geometry))

    # Set Units
    density = density * unit.grams / unit.milliliter
    padding_distance = padding_distance * unit.angstrom
    salt_concentration = salt_concentration * unit.millimolar

    # Calculate the Solute Bounding Box
    BB_solute = BoundingBox(solute)

    # Estimate of the box cube length
    box_edge = 2.0 * padding_distance + np.max(BB_solute[1] -
                                               BB_solute[0]) * unit.angstrom

    if geometry == 'box':
        # Box Volume
        Volume = box_edge**3
    if geometry == 'sphere':
        Volume = (4.0 / 3.0) * 3.14159265 * (0.5 * box_edge)**3

    # Omega engine is used to generate conformations
    omegaOpts = oeomega.OEOmegaOptions()
    omegaOpts.SetMaxConfs(1)
    omegaOpts.SetStrictStereo(False)
    omega = oeomega.OEOmega(omegaOpts)

    # Create a string code to identify the solute residues. The code ID used is based
    # on the residue number id, the residue name and the chain id:
    # id+resname+chainID
    hv_solute = oechem.OEHierView(
        solute,
        oechem.OEAssumption_BondedResidue + oechem.OEAssumption_ResPerceived)
    solute_resid_list = []
    for chain in hv_solute.GetChains():
        for frag in chain.GetFragments():
            for hres in frag.GetResidues():
                oe_res = hres.GetOEResidue()
                solute_resid_list.append(
                    str(oe_res.GetResidueNumber()) + oe_res.GetName() +
                    chain.GetChainID())

    # Solvent component list_names
    solvent_resid_dic_names = dict()

    # Neutralize solute
    ion_sum_wgt_n_ions = 0.0 * unit.grams / unit.mole
    if neutralize_solute:
        # Container for the counter-ions
        oe_ions = []
        # Container for the ion smiles strings
        ions_smiles = []
        solute_formal_charge = 0
        for at in solute.GetAtoms():
            solute_formal_charge += at.GetFormalCharge()
        if solute_formal_charge > 0:
            ions_smiles.append("[Cl-]")
        elif solute_formal_charge < 0:
            ions_smiles.append("[Na+]")
        else:
            pass

        # Total number of counter-ions to neutralize the solute
        n_ions = abs(solute_formal_charge)

        # print("Counter ions to add = {} of {}".format(n_ions, ions_smiles[0]))

        # Ions
        if n_ions >= 1:
            for sm in ions_smiles:
                mol = oechem.OEMol()
                if not oechem.OESmilesToMol(mol, sm):
                    raise ValueError(
                        "Error counter ions: SMILES string parsing fails for the string: {}"
                        .format(sm))

                # Generate conformer
                if not omega(mol):
                    raise ValueError(
                        "Error counter ions: Conformer generation fails for the molecule with "
                        "smiles string: {}".format(sm))

                oe_ions.append(mol)

                if sm == '[Na+]':
                    solvent_resid_dic_names[' NA'] = mol
                else:
                    solvent_resid_dic_names[' CL'] = mol

            ion_sum_wgt = 0.0 * unit.grams / unit.mole
            for ion in oe_ions:
                # Molecular weight
                ion_sum_wgt += oechem.OECalculateMolecularWeight(
                    ion) * unit.grams / unit.mole

            ion_sum_wgt_n_ions = ion_sum_wgt * n_ions

            # Create ions .pdb files
            ions_smiles_pdbs = []
            for i in range(0, len(ions_smiles)):
                pdb_name = os.path.basename(tempfile.mktemp(suffix='.pdb'))
                pdb_name = ions_smiles[i] + '_' + pdb_name
                ions_smiles_pdbs.append(pdb_name)

            for i in range(0, len(ions_smiles)):
                ofs = oechem.oemolostream(ions_smiles_pdbs[i])
                oechem.OEWriteConstMolecule(ofs, oe_ions[i])

    # Add salts to the solution

    # Solvent smiles string parsing
    char_set = string.ascii_uppercase
    salt_sum_wgt_n_salt = 0.0 * unit.grams / unit.mole
    if salt_concentration > 0.0 * unit.millimolar:

        salt_smiles = [sm.strip() for sm in salt.split(',')]

        # Container list of oemol salt molecules generated by using smiles strings
        oe_salt = []

        for sm in salt_smiles:
            mol_salt = oechem.OEMol()
            if not oechem.OESmilesToMol(mol_salt, sm):
                raise ValueError(
                    "Error salt: SMILES string parsing fails for the string: {}"
                    .format(sm))

            # Generate conformer
            if not omega(mol_salt):
                raise ValueError(
                    "Error salt: Conformer generation fails for the "
                    "molecule with smiles string: {}".format(sm))

            # Unique 3 code letter are set as solvent residue names
            solv_id = ''.join(random.sample(char_set * 3, 3))

            # Try to recognize the residue name
            oechem.OEPerceiveResidues(mol_salt)

            for atmol in mol_salt.GetAtoms():
                res = oechem.OEAtomGetResidue(atmol)
                if res.GetName() == 'UNL':
                    res.SetName(solv_id)
                    oechem.OEAtomSetResidue(atmol, res)
                    if solv_id not in solvent_resid_dic_names:
                        solvent_resid_dic_names[solv_id] = mol_salt
                else:
                    if res.GetName() not in solvent_resid_dic_names:
                        solvent_resid_dic_names[res.GetName()] = mol_salt
                    break

            oe_salt.append(mol_salt)

        n_salt = int(
            round(unit.AVOGADRO_CONSTANT_NA * salt_concentration *
                  Volume.in_units_of(unit.liter)))

        # for i in range(0, len(salt_smiles)):
        #     print("Number of molecules for the salt component {} = {}".format(salt_smiles[i], n_salt))

        salt_sum_wgt = 0.0 * unit.grams / unit.mole
        for salt in oe_salt:
            # Molecular weight
            salt_sum_wgt += oechem.OECalculateMolecularWeight(
                salt) * unit.grams / unit.mole

        salt_sum_wgt_n_salt = salt_sum_wgt * n_salt

        # Create salt .pdb files
        if n_salt >= 1:
            salt_pdbs = []
            for i in range(0, len(salt_smiles)):
                pdb_name = os.path.basename(tempfile.mktemp(suffix='.pdb'))
                # pdb_name = salt_smiles[i] + '_' + pdb_name
                salt_pdbs.append(pdb_name)

            for i in range(0, len(salt_smiles)):
                ofs = oechem.oemolostream(salt_pdbs[i])
                oechem.OEWriteConstMolecule(ofs, oe_salt[i])

    # Container list of oemol solvent molecules generated by using smiles strings
    oe_solvents = []

    for sm in solvent_smiles:

        if sm == 'tip3p':
            tip3p_fn = os.path.join(PACKAGE_DIR, 'oeommtools', 'data',
                                    'tip3p.pdb')
            ifs = oechem.oemolistream(tip3p_fn)
            mol_sol = oechem.OEMol()

            if not oechem.OEReadMolecule(ifs, mol_sol):
                raise IOError(
                    "It was not possible to read the tip3p molecule file")
        else:

            mol_sol = oechem.OEMol()

            if not oechem.OESmilesToMol(mol_sol, sm):
                raise ValueError(
                    "Error solvent: SMILES string parsing fails for the string: {}"
                    .format(sm))

            # Generate conformer
            if not omega(mol_sol):
                raise ValueError(
                    "Error solvent: Conformer generation fails for "
                    "the molecule with smiles string: {}".format(sm))

        # Unique 3 code letter are set as solvent residue names
        solv_id = ''.join(random.sample(char_set * 3, 3))

        # Try to recognize the residue name
        oechem.OEPerceiveResidues(mol_sol)

        for atmol in mol_sol.GetAtoms():
            res = oechem.OEAtomGetResidue(atmol)
            if res.GetName() == 'UNL':
                res.SetName(solv_id)
                oechem.OEAtomSetResidue(atmol, res)
                if solv_id not in solvent_resid_dic_names:
                    solvent_resid_dic_names[solv_id] = mol_sol
            else:
                if res.GetName() not in solvent_resid_dic_names:
                    solvent_resid_dic_names[res.GetName()] = mol_sol
                break

        oe_solvents.append(mol_sol)

    # Sum of the solvent molecular weights
    solvent_sum_wgt_frac = 0.0 * unit.grams / unit.mole

    for idx in range(0, len(oe_solvents)):
        # Molecular weight
        wgt = oechem.OECalculateMolecularWeight(
            oe_solvents[idx]) * unit.grams / unit.mole
        solvent_sum_wgt_frac += wgt * mol_fractions[idx]

    # Solute molecular weight
    solute_wgt = oechem.OECalculateMolecularWeight(
        solute) * unit.gram / unit.mole

    # Estimate of the number of each molecular species present in the solution accordingly
    # to their molar fraction fi:
    #
    # ni = fi*(density*volume*NA - wgt_solute - sum_k(wgt_salt_k*nk) - wgt_ion*n_ion)/sum_j(wgt_nj * fj)
    #
    # where ni is the number of molecule of specie i, density the mixture density, volume the
    # mixture volume, wgt_solute the molecular weight of the solute, wgt_salt_k the molecular
    # weight of the salt component k, nk the number of molecule of salt component k, wgt_ion
    # the counter ion molecular weight, n_ions the number of counter ions and wgt_nj the molecular
    # weight of the molecule specie j with molar fraction fj

    div = (unit.AVOGADRO_CONSTANT_NA * density * Volume -
           (solute_wgt + salt_sum_wgt_n_salt +
            ion_sum_wgt_n_ions)) / solvent_sum_wgt_frac

    # Solvent number of monomers
    n_monomers = [int(round(mf * div)) for mf in mol_fractions]

    if not all([nm > 0 for nm in n_monomers]):
        raise ValueError(
            "Error negative number of solvent components: the density could be too low"
        )

    # for i in range(0, len(solvent_smiles)):
    #     print("Number of molecules for the component {} = {}".format(solvent_smiles[i], n_monomers[i]))

    # Packmol Configuration file setting
    if close_solvent:
        header_template = """\n# Mixture\ntolerance {}\nfiletype pdb\noutput {}\nadd_amber_ter\navoid_overlap no"""
    else:
        header_template = """\n# Mixture\ntolerance {}\nfiletype pdb\noutput {}\nadd_amber_ter\navoid_overlap yes"""

    # Templates strings
    solute_template = """\n\n# Solute\nstructure {}\nnumber 1\nfixed 0. 0. 0. 0. 0. 0.\nresnumbers 1\nend structure"""

    if geometry == 'box':
        solvent_template = """\nstructure {}\nnumber {}\ninside box {:0.3f} {:0.3f} {:0.3f} {:0.3f} {:0.3f} {:0.3f}\
        \nchain !\nresnumbers 3\nend structure"""
    if geometry == 'sphere':
        solvent_template = """\nstructure {}\nnumber {}\ninside sphere {:0.3f} {:0.3f} {:0.3f} {:0.3f}\
        \nchain !\nresnumbers 3\nend structure"""

    # Create solvents .pdb files
    solvent_pdbs = []
    for i in range(0, len(solvent_smiles)):
        pdb_name = os.path.basename(tempfile.mktemp(suffix='.pdb'))
        solvent_pdbs.append(pdb_name)

    for i in range(0, len(solvent_smiles)):
        ofs = oechem.oemolostream(solvent_pdbs[i])
        oechem.OEWriteConstMolecule(ofs, oe_solvents[i])

    solute_pdb = 'solute' + '_' + os.path.basename(
        tempfile.mktemp(suffix='.pdb'))
    ofs = oechem.oemolostream(solute_pdb)

    if solute.GetMaxConfIdx() > 1:
        raise ValueError("Solutes with multiple conformers are not supported")
    else:
        oechem.OEWriteConstMolecule(ofs, solute)

    # Write Packmol header section
    mixture_pdb = 'mixture' + '_' + os.path.basename(
        tempfile.mktemp(suffix='.pdb'))
    body = header_template.format(distance_between_atoms, mixture_pdb)
    # Write Packmol configuration file solute section
    body += solute_template.format(solute_pdb)

    # The solute is centered inside the box
    xc = (BB_solute[0][0] + BB_solute[1][0]) / 2.
    yc = (BB_solute[0][1] + BB_solute[1][1]) / 2.
    zc = (BB_solute[0][2] + BB_solute[1][2]) / 2.

    # Correct for periodic box conditions to avoid
    # steric clashes at the box edges
    pbc_correction = 1.0 * unit.angstrom

    xmin = xc - ((box_edge - pbc_correction) / 2.) / unit.angstrom
    xmax = xc + ((box_edge - pbc_correction) / 2.) / unit.angstrom
    ymin = yc - ((box_edge - pbc_correction) / 2.) / unit.angstrom
    ymax = yc + ((box_edge - pbc_correction) / 2.) / unit.angstrom
    zmin = zc - ((box_edge - pbc_correction) / 2.) / unit.angstrom
    zmax = zc + ((box_edge - pbc_correction) / 2.) / unit.angstrom

    # Packmol setting for the solvent section
    body += '\n\n# Solvent'
    for i in range(0, len(solvent_smiles)):
        if geometry == 'box':
            body += solvent_template.format(solvent_pdbs[i], n_monomers[i],
                                            xmin, ymin, zmin, xmax, ymax, zmax)
        if geometry == 'sphere':
            body += solvent_template.format(solvent_pdbs[i], n_monomers[i], xc,
                                            yc, zc,
                                            0.5 * box_edge / unit.angstrom)

    # Packmol setting for the salt section
    if salt_concentration > 0.0 * unit.millimolar and n_salt >= 1:
        body += '\n\n# Salt'
        for i in range(0, len(salt_smiles)):
            if geometry == 'box':
                body += solvent_template.format(salt_pdbs[i],
                                                int(round(n_salt)), xmin, ymin,
                                                zmin, xmax, ymax, zmax)
            if geometry == 'sphere':
                body += solvent_template.format(salt_pdbs[i],
                                                int(round(n_salt)), xc, yc, zc,
                                                0.5 * box_edge / unit.angstrom)

    # Packmol setting for the ions section
    if neutralize_solute and n_ions >= 1:
        body += '\n\n# Counter Ions'
        for i in range(0, len(ions_smiles)):
            if geometry == 'box':
                body += solvent_template.format(ions_smiles_pdbs[i], n_ions,
                                                xmin, ymin, zmin, xmax, ymax,
                                                zmax)
            if geometry == 'sphere':
                body += solvent_template.format(ions_smiles_pdbs[i], n_ions,
                                                xc, yc, zc,
                                                0.5 * box_edge / unit.angstrom)

    # Packmol configuration file
    packmol_filename = os.path.basename(tempfile.mktemp(suffix='.inp'))

    with open(packmol_filename, 'w') as file_handle:
        file_handle.write(body)

    # Call Packmol
    if not verbose:
        mute_output = open(os.devnull, 'w')
        with open(packmol_filename, 'r') as file_handle:
            subprocess.check_call(['packmol'],
                                  stdin=file_handle,
                                  stdout=mute_output,
                                  stderr=mute_output)
    else:
        with open(packmol_filename, 'r') as file_handle:
            subprocess.check_call(['packmol'], stdin=file_handle)

    # Read in the Packmol solvated system
    solvated = oechem.OEMol()

    if os.path.exists(mixture_pdb + '_FORCED'):
        os.rename(mixture_pdb + '_FORCED', mixture_pdb)
        print("Warning: Packing solution is not optimal")

    ifs = oechem.oemolistream(mixture_pdb)
    oechem.OEReadMolecule(ifs, solvated)

    # To avoid to change the user oemol starting solute by reading in
    # the generated mixture pdb file and loosing molecule info, the
    # solvent molecules are extracted from the mixture system and
    # added back to the starting solute

    # Extract from the solution system the solvent molecules
    # by checking the previous solute generated ID: id+resname+chainID
    hv_solvated = oechem.OEHierView(
        solvated,
        oechem.OEAssumption_BondedResidue + oechem.OEAssumption_ResPerceived)

    # This molecule will hold the solvent molecules generated directly from
    # the omega conformers. This is useful to avoid problems related to read in
    # the solvent molecules from pdb files and triggering unwanted perceiving actions
    new_components = oechem.OEMol()

    bv = oechem.OEBitVector(solvated.GetMaxAtomIdx())
    for chain in hv_solvated.GetChains():
        for frag in chain.GetFragments():
            for hres in frag.GetResidues():
                oe_res = hres.GetOEResidue()
                if str(oe_res.GetResidueNumber()) + oe_res.GetName(
                ) + chain.GetChainID() not in solute_resid_list:
                    oechem.OEAddMols(new_components,
                                     solvent_resid_dic_names[oe_res.GetName()])
                    atms = hres.GetAtoms()
                    for at in atms:
                        bv.SetBitOn(at.GetIdx())

    pred = oechem.OEAtomIdxSelected(bv)
    components = oechem.OEMol()
    oechem.OESubsetMol(components, solvated, pred)

    new_components.SetCoords(components.GetCoords())

    # This is necessary otherwise just one big residue is created
    oechem.OEPerceiveResidues(new_components)

    # Add the solvent molecules to the solute copy
    solvated_system = solute.CreateCopy()
    oechem.OEAddMols(solvated_system, new_components)

    # Set Title
    solvated_system.SetTitle(solute.GetTitle())

    # Set ions resname to Na+ and Cl-
    for at in solvated_system.GetAtoms():
        res = oechem.OEAtomGetResidue(at)
        if res.GetName() == ' NA':
            res.SetName("Na+")
            oechem.OEAtomSetResidue(atmol, res)
        elif res.GetName() == ' CL':
            res.SetName("Cl-")
            oechem.OEAtomSetResidue(atmol, res)
        else:
            pass

    # Cleaning
    to_delete = solvent_pdbs + [packmol_filename, solute_pdb, mixture_pdb]

    if salt_concentration > 0.0 * unit.millimolar and n_salt >= 1:
        to_delete += salt_pdbs
    if neutralize_solute and n_ions >= 1:
        to_delete += ions_smiles_pdbs

    for fn in to_delete:
        try:
            os.remove(fn)
        except:
            pass

    # Calculate the solution total density
    total_wgt = oechem.OECalculateMolecularWeight(
        solvated_system) * unit.gram / unit.mole
    density_mix = (1 / unit.AVOGADRO_CONSTANT_NA) * total_wgt / Volume
    print("Computed Solution Density = {}".format(
        density_mix.in_units_of(unit.gram / unit.milliliter)))
    # Threshold checking
    ths = 0.1 * unit.gram / unit.milliliter
    if not abs(density -
               density_mix.in_units_of(unit.gram / unit.milliliter)) < ths:
        raise ValueError(
            "Error: the computed density for the solute {} does not match the selected density {} vs {}"
            .format(solute.GetTitle(), density_mix, density))

    if geometry == 'box':
        # Define the box vector and attached it as SD tag to the solvated system
        # with ID tag: 'box_vectors'
        box_vectors = (Vec3(box_edge / unit.angstrom, 0.0,
                            0.0), Vec3(0.0, box_edge / unit.angstrom, 0.0),
                       Vec3(0.0, 0.0,
                            box_edge / unit.angstrom)) * unit.angstrom

        box_vectors = data_utils.encodePyObj(box_vectors)
        solvated_system.SetData(oechem.OEGetTag('box_vectors'), box_vectors)

    if return_components:
        new_components.SetTitle(solute.GetTitle() + '_solvent_comp')
        return solvated_system, new_components
    else:
        return solvated_system
Exemple #21
0
def applyffExcipients(excipients, opt):
    """
    This function applies the selected force field to the
    excipients

    Parameters:
    -----------
    excipients: OEMol molecule
        The excipients molecules to parametrize
    opt: python dictionary
        The options used to parametrize the excipients

    Return:
    -------
    excipient_structure: Parmed structure instance
        The parametrized excipient parmed structure
    """

    # OpenMM topology and positions from OEMol
    topology, positions = oeommutils.oemol_to_openmmTop(excipients)

    # Try to apply the selected FF on the excipients
    forcefield = app.ForceField(opt['protein_forcefield'])

    # List of the unrecognized excipients
    unmatched_res_list = forcefield.getUnmatchedResidues(topology)

    # Unique unrecognized excipient names
    templates = set()
    for res in unmatched_res_list:
        templates.add(res.name)

    if templates:  # Some excipients are not recognized
        oechem.OEThrow.Info("The following excipients are not recognized "
                            "by the protein FF: {}"
                            "\nThey will be parametrized by using the FF: {}".format(templates, opt['other_forcefield']))

        # Create a bit vector mask used to split recognized from un-recognize excipients
        bv = oechem.OEBitVector(excipients.GetMaxAtomIdx())
        bv.NegateBits()

        # Dictionary containing the name and the parmed structures of the unrecognized excipients
        unrc_excipient_structures = {}

        # Dictionary used to skip already selected unrecognized excipients and count them
        unmatched_excp = {}

        # Ordered list of the unrecognized excipients
        unmatched_res_order = []

        for r_name in templates:
            unmatched_excp[r_name] = 0

        hv = oechem.OEHierView(excipients)

        for chain in hv.GetChains():
            for frag in chain.GetFragments():
                for hres in frag.GetResidues():
                    r_name = hres.GetOEResidue().GetName()
                    if r_name not in unmatched_excp:
                        continue
                    else:
                        unmatched_res_order.append(r_name)
                        if unmatched_excp[r_name]:  # Test if we have selected the unknown excipient
                            # Set Bit mask
                            atms = hres.GetAtoms()
                            for at in atms:
                                bv.SetBitOff(at.GetIdx())
                            unmatched_excp[r_name] += 1
                        else:
                            unmatched_excp[r_name] = 1
                            #  Create AtomBondSet to extract from the whole excipient system
                            #  the current selected FF unknown excipient
                            atms = hres.GetAtoms()
                            bond_set = set()
                            for at in atms:
                                bv.SetBitOff(at.GetIdx())
                                bonds = at.GetBonds()
                                for bond in bonds:
                                    bond_set.add(bond)
                            atom_bond_set = oechem.OEAtomBondSet(atms)
                            for bond in bond_set:
                                atom_bond_set.AddBond(bond)

                            # Create the unrecognized excipient OEMol
                            unrc_excp = oechem.OEMol()
                            if not oechem.OESubsetMol(unrc_excp, excipients, atom_bond_set):
                                oechem.OEThrow.Fatal("Is was not possible extract the residue: {}".format(r_name))

                            # Charge the unrecognized excipient
                            if not oequacpac.OEAssignCharges(unrc_excp,
                                                             oequacpac.OEAM1BCCCharges(symmetrize=True)):
                                oechem.OEThrow.Fatal("Is was not possible to "
                                                     "charge the extract residue: {}".format(r_name))

                            # If GAFF or GAFF2 is selected as FF check for tleap command
                            if opt['other_forcefield'] in ['GAFF', 'GAFF2']:
                                ff_utils.ParamLigStructure(oechem.OEMol(), opt['other_forcefield']).checkTleap

                            if opt['other_forcefield'] == 'SMIRNOFF':
                                unrc_excp = oeommutils.sanitizeOEMolecule(unrc_excp)

                            # Parametrize the unrecognized excipient by using the selected FF
                            pmd = ff_utils.ParamLigStructure(unrc_excp, opt['other_forcefield'],
                                                             prefix_name=opt['prefix_name']+'_'+r_name)
                            unrc_excp_struc = pmd.parameterize()
                            unrc_excp_struc.residues[0].name = r_name
                            unrc_excipient_structures[r_name] = unrc_excp_struc

        # Recognized FF excipients
        pred_rec = oechem.OEAtomIdxSelected(bv)
        rec_excp = oechem.OEMol()
        oechem.OESubsetMol(rec_excp, excipients, pred_rec)

        if rec_excp.NumAtoms() > 0:
            top_known, pos_known = oeommutils.oemol_to_openmmTop(rec_excp)
            ff_rec = app.ForceField(opt['protein_forcefield'])
            try:
                omm_system = ff_rec.createSystem(top_known, rigidWater=False)
                rec_struc = parmed.openmm.load_topology(top_known, omm_system, xyz=pos_known)
            except:
                oechem.OEThrow.Fatal("Error in the recognised excipient parametrization")

        # Unrecognized FF excipients
        bv.NegateBits()
        pred_unrc = oechem.OEAtomIdxSelected(bv)
        unrc_excp = oechem.OEMol()
        oechem.OESubsetMol(unrc_excp, excipients, pred_unrc)

        # Unrecognized FF excipients coordinates
        oe_coord_dic = unrc_excp.GetCoords()
        unrc_coords = np.ndarray(shape=(unrc_excp.NumAtoms(), 3))
        for at_idx in oe_coord_dic:
            unrc_coords[at_idx] = oe_coord_dic[at_idx]

        # It is important the order used to assemble the structures. In order to
        # avoid mismatch between the coordinates and the structures, it is convenient
        # to use the unrecognized residue order
        unmatched_res_order_count = []
        i = 0
        while i < len(unmatched_res_order):
            res_name = unmatched_res_order[i]
            for j in range(i+1, len(unmatched_res_order)):
                if unmatched_res_order[j] == res_name:
                    continue
                else:
                    break
            if i == (len(unmatched_res_order) - 1):
                num = 1
                unmatched_res_order_count.append((res_name, num))
                break
            else:
                num = j - i
                unmatched_res_order_count.append((res_name, num))
                i = j

        # Merge all the unrecognized Parmed structure
        unrc_struc = parmed.Structure()

        for pair in unmatched_res_order_count:
            res_name = pair[0]
            nums = pair[1]
            unrc_struc = unrc_struc + nums*unrc_excipient_structures[res_name]

        # Set the unrecognized coordinates
        unrc_struc.coordinates = unrc_coords

        # Set the parmed excipient structure merging
        # the unrecognized and recognized parmed
        # structures together
        if rec_excp.NumAtoms() > 0:
            excipients_structure = unrc_struc + rec_struc
        else:
            excipients_structure = unrc_struc

        return excipients_structure
    else:  # All the excipients are recognized by the selected FF
        omm_system = forcefield.createSystem(topology, rigidWater=False)
        excipients_structure = parmed.openmm.load_topology(topology, omm_system, xyz=positions)

        return excipients_structure
Exemple #22
0
def mutate_structure(target_structure: oechem.OEGraphMol,
                     template_sequence: str) -> oechem.OEGraphMol:
    """
    Mutate a protein structure according to an amino acid sequence.
    Parameters
    ----------
    target_structure: oechem.OEGraphMol
        An OpenEye molecule holding a protein structure to mutate.
    template_sequence: str
        A template one letter amino acid sequence, which defines the sequence the target structure should be mutated
        to. Protein residues not matching a template sequence will be either mutated or deleted.
    Returns
    -------
    mutated_structure: oechem.OEGraphMol
        An OpenEye molecule holding the mutated protein structure.
    """
    from Bio import pairwise2

    # the hierarchy view is more stable if reinitialized after each change
    # https://docs.eyesopen.com/toolkits/python/oechemtk/biopolymers.html#a-hierarchy-view
    finished = False
    while not finished:
        altered = False
        # align template and target sequences
        target_sequence = get_sequence(target_structure)
        template_sequence_aligned, target_sequence_aligned = pairwise2.align.globalxs(
            template_sequence, target_sequence, -10, 0)[0][:2]
        logging.debug(f"Template sequence:\n{template_sequence}")
        logging.debug(f"Target sequence:\n{target_sequence}")
        hierview = oechem.OEHierView(target_structure)
        structure_residues = hierview.GetResidues()
        # adjust target structure to match template sequence
        for template_sequence_residue, target_sequence_residue in zip(
                template_sequence_aligned, target_sequence_aligned):
            if template_sequence_residue == "-":
                # delete any non protein residue from target structure
                structure_residue = structure_residues.next()
                if target_sequence_residue != "X":
                    # delete
                    for atom in structure_residue.GetAtoms():
                        target_structure.DeleteAtom(atom)
                    # break loop and reinitialize
                    altered = True
                    break
            else:
                # compare amino acids
                if target_sequence_residue != "-":
                    structure_residue = structure_residues.next()
                    if target_sequence_residue not in [
                            "X", template_sequence_residue
                    ]:
                        # mutate
                        structure_residue = structure_residue.GetOEResidue()
                        three_letter_code = oechem.OEGetResidueName(
                            oechem.OEGetResidueIndexFromCode(
                                template_sequence_residue))
                        oespruce.OEMutateResidue(target_structure,
                                                 structure_residue,
                                                 three_letter_code)
                        # break loop and reinitialize
                        altered = True
                        break
        # leave while loop if no changes were introduced
        if not altered:
            finished = True
    # OEMutateResidue doesn't build sidechains and doesn't add hydrogens automatically
    oespruce.OEBuildSidechains(target_structure)
    oechem.OEPlaceHydrogens(target_structure)
    # update residue information
    oechem.OEPerceiveResidues(target_structure, oechem.OEPreserveResInfo_All)

    return target_structure
def dock_molecule_to_receptor(molecule, receptor_filename, covalent=False):
    """
    Dock the specified molecules, writing out to specified file

    Parameters
    ----------
    molecule : oechem.OEMol
        The molecule to dock
    receptor_filename : str
        Receptor to dock to
    covalent : bool, optional, default=False
        If True, try to place covalent warheads in proximity to CYS145

    Returns
    -------
    docked_molecule : openeye.oechem.OEMol
        Returns the best tautomer/protomer in docked geometry, annotated with docking score
        None is returned if no viable docked pose found
    """
    import os

    # Extract the fragment name for the receptor
    fragment = extract_fragment_from_filename(receptor_filename)

    # Read the receptor
    from openeye import oechem, oedocking
    receptor = oechem.OEGraphMol()
    if not oedocking.OEReadReceptorFile(receptor, receptor_filename):
        oechem.OEThrow.Fatal("Unable to read receptor")
    #print(f'Receptor has {receptor.NumAtoms()} atoms')

    if not oedocking.OEReceptorHasBoundLigand(receptor):
        raise Exception("Receptor does not have bound ligand")

    #print('Initializing receptor...')
    dockMethod = oedocking.OEDockMethod_Hybrid2
    dockResolution = oedocking.OESearchResolution_High
    dock = oedocking.OEDock(dockMethod, dockResolution)
    success = dock.Initialize(receptor)

    # Add covalent restraint if specified
    warheads_found = find_warheads(molecule)
    if covalent and len(warheads_found) > 0:
        warheads_found = set(warheads_found.keys())

        # Initialize covalent constraints
        customConstraints = oedocking.OEReceptorGetCustomConstraints(receptor)

        # Find CYS145 SG atom
        hv = oechem.OEHierView(receptor)
        hres = hv.GetResidue("A", "CYS", 145)
        proteinHeavyAtom = None
        for atom in hres.GetAtoms():
            if atom.GetName().strip() == 'SG':
                proteinHeavyAtom = atom
                break
        if proteinHeavyAtom is None:
            raise Exception('Could not find CYS145 SG')

        # Add the constraint
        feature = customConstraints.AddFeature()
        feature.SetFeatureName("CYS145 proximity")
        for warhead_type in warheads_found:
            smarts = covalent_warhead_smarts[warhead_type]
            print(f'Adding constraint for SMARTS pattern {smarts}')
            feature.AddSmarts(smarts)
        sphereRadius = 4.0 # Angstroms
        sphereCenter = oechem.OEFloatArray(3)
        receptor.GetCoords(proteinHeavyAtom, sphereCenter)
        sphere = feature.AddSphere()
        sphere.SetRad(sphereRadius)
        sphere.SetCenter(sphereCenter[0], sphereCenter[1], sphereCenter[2])
        oedocking.OEReceptorSetCustomConstraints(receptor, customConstraints)

    # Enumerate tautomers
    from openeye import oequacpac
    tautomer_options = oequacpac.OETautomerOptions()
    tautomer_options.SetMaxTautomersGenerated(4096)
    tautomer_options.SetMaxTautomersToReturn(16)
    tautomer_options.SetCarbonHybridization(True)
    tautomer_options.SetMaxZoneSize(50)
    tautomer_options.SetApplyWarts(True)
    pKa_norm = True
    tautomers = [ oechem.OEMol(tautomer) for tautomer in oequacpac.OEGetReasonableTautomers(molecule, tautomer_options, pKa_norm) ]

    # Set up Omega
    #print('Expanding conformers...')
    from openeye import oeomega
    #omegaOpts = oeomega.OEOmegaOptions(oeomega.OEOmegaSampling_Dense)
    omegaOpts = oeomega.OEOmegaOptions()
    #omegaOpts.SetMaxConfs(5000)
    omegaOpts.SetMaxSearchTime(60.0) # time out
    omega = oeomega.OEOmega(omegaOpts)
    omega.SetStrictStereo(False) # enumerate sterochemistry if uncertain

    # Dock tautomers
    docked_molecules = list()
    from tqdm import tqdm
    for mol in tautomers:
        dockedMol = oechem.OEGraphMol()

        # Expand conformers
        omega.Build(mol)

        # Dock molecule
        retCode = dock.DockMultiConformerMolecule(dockedMol, mol)
        if (retCode != oedocking.OEDockingReturnCode_Success):
            #print("Docking Failed with error code " + oedocking.OEDockingReturnCodeGetName(retCode))
            continue

        # Store docking data
        sdtag = oedocking.OEDockMethodGetName(dockMethod)
        oedocking.OESetSDScore(dockedMol, dock, sdtag)
        oechem.OESetSDData(dockedMol, "docked_fragment", fragment)
        dock.AnnotatePose(dockedMol)

        docked_molecules.append( dockedMol.CreateCopy() )

    if len(docked_molecules) == 0:
        return None

    # Select the best-ranked molecule and pose
    # Note that this ignores protonation state and tautomer penalties
    docked_molecules.sort(key=score)
    best_molecule = docked_molecules[0]

    return best_molecule
    def makeComplex(self):
        '''
        This function reads the protein, takes each residue, checks if at
        least one of its atoms
        is in the self.consensusResList, if yes it creates a new OEGraphMol
        object representing that residue.
        The OEGraphMol creation is as follows : read each atom,
        read each bond of each atom,
        store all the bonds read, create only when it is a
        new bond, create the atoms in the same way.
        Special care is taken for the N & C terminal of each
        residue, we do not want to select the atom of the
        neibouring residue, a flag helps this careful
        selection.
        '''

        # Load protein, extract residues
        ifsProt = oechem.oemolistream()
        ifsProt.SetFormat(oechem.OEFormat_PDB)

        if ifsProt.open(self.pdbPath):
            for prot in ifsProt.GetOEGraphMols():

                oechem.OEPerceiveResidues(prot, oechem.OEPreserveResInfo_All)
                hierView = oechem.OEHierView(prot)

                # Looping over the protein's residues
                for mol in hierView.GetResidues():

                    # Check if residue or ligand
                    molName = mol.GetResidueName()
                    if molName in self.correctResNames:

                        # Fix residue number
                        resNumber = str(mol.GetResidueNumber())
                        if len(resNumber) == 1:
                            resNumber = "00" + resNumber
                        elif len(resNumber) == 2:
                            resNumber = "0" + resNumber

                        if molName in ("HIE", "HSE"):
                            molName = "HIS"
                        resTitle = resNumber + "_" + molName

                        # Setup the information to store for this residue
                        residue = self.createMolecule(mol, resTitle)

                        #--------------------------------------
                        # Set of fixes specific for residues
                        #--------------------------------------
                        # Fix aspartic acid and carboxylic acid charge
                        if "_ASP" in resTitle or "_GLU" in resTitle:
                            self.fixAcid(residue)

                        # Fix N-term and C-term terminii
                        self.fixTerminii(residue)

                        resRings = RingAnalysis(residue).ringsData
                        # Store the information
                        self.residues[resTitle] = [residue, resRings, True]
                    else:
                        # What to store for the ligand
                        ligand = self.createMolecule(mol, molName)
                        ligandRings = RingAnalysis(ligand).ringsData
                        # Store it
                        self.ligand = [ligand, ligandRings]