Ejemplo n.º 1
0
def test_pi_stacking():
    """Pi-stacking test"""
    pi_parallel_count = [
        pi_stacking(rec, mol, cutoff=8)[2].sum() for mol in mols
    ]
    print(pi_parallel_count)
    # assert_array_equal(pi_parallel_count,
    #                    [])

    pi_perpendicular_count = [
        pi_stacking(rec, mol, cutoff=8)[3].sum() for mol in mols
    ]
    print(pi_perpendicular_count)
    assert_array_equal(pi_perpendicular_count, [])
Ejemplo n.º 2
0
def test_pi_stacking():
    """Pi-stacking test"""
    pi_parallel_count = [pi_stacking(rec,
                                     mol,
                                     cutoff=8)[2].sum() for mol in mols]
    print(pi_parallel_count)
    # assert_array_equal(pi_parallel_count,
    #                    [])

    pi_perpendicular_count = [pi_stacking(rec,
                                          mol,
                                          cutoff=8)[3].sum() for mol in mols]
    print(pi_perpendicular_count)
    assert_array_equal(pi_perpendicular_count,
                       [])
Ejemplo n.º 3
0
def test_pi_stacking_perpendicular_pdb():
    pocket = next(
        oddt.toolkit.readfile(
            'pdb', os.path.join(test_data_dir, 'data/pdb/4ljh_pocket.pdb')))
    pocket.protein = True
    ligand = next(
        oddt.toolkit.readfile(
            'sdf', os.path.join(test_data_dir, 'data/pdb/4ljh_ligand.sdf')))

    pi1, pi2, strict_parallel, strict_perpendicular = pi_stacking(pocket,
                                                                  ligand,
                                                                  tolerance=30)
    assert strict_parallel.sum() == 0
    assert strict_perpendicular.sum() == 1
    assert pi1['resname'].tolist() == ['HIS']

    pi1, pi2, strict_parallel, strict_perpendicular = pi_stacking(ligand,
                                                                  pocket,
                                                                  tolerance=30)
    assert strict_parallel.sum() == 0
    assert strict_perpendicular.sum() == 1
    assert pi2['resname'].tolist() == ['HIS']
Ejemplo n.º 4
0
    def build(self, ligands, protein=None):
        """ Descriptor building method

        Parameters
        ----------
            ligands: array-like
                An array of generator of oddt.toolkit.Molecule objects for which the descriptor is computed

            protein: oddt.toolkit.Molecule object (default=None)
                Protein object to be used while generating descriptors.
                If none, then the default protein (from constructor) is used.
                Otherwise, protein becomes new global and default protein.

        Returns
        -------
            descs: numpy array, shape=[n_samples, 351]
                An array of binana descriptors, aligned with input ligands
        """
        if protein:
            self.set_protein(protein)
        else:
            protein = self.protein
        protein_dict = protein.atom_dict
        desc = None
        for mol in ligands:
            mol_dict = mol.atom_dict
            vec = np.array([], dtype=float)
            vec = tuple()
            # Vina
            # TODO: Asynchronous output from vina, push command to score and retrieve at the end?
            # TODO: Check if ligand has vina scores
            vec += tuple(self.vina.build(mol).flatten())

            # Close Contacts (<4A)
            vec += tuple(self.cc_4.build(mol).flatten())

            # Electrostatics (<4A)
            ele_rec_types, ele_lig_types = zip(*self.ele_types)
            ele_mol_atoms = atoms_by_type(mol_dict, ele_lig_types,
                                          'atom_types_ad4')
            ele_rec_atoms = atoms_by_type(protein_dict, ele_rec_types,
                                          'atom_types_ad4')
            ele = tuple()
            for r_t, m_t in self.ele_types:
                mol_ele_dict, rec_ele_dict = close_contacts(
                    ele_mol_atoms[m_t], ele_rec_atoms[r_t], 4)
                if len(mol_ele_dict) and len(rec_ele_dict):
                    ele += (mol_ele_dict['charge'] * rec_ele_dict['charge'] /
                            np.sqrt((mol_ele_dict['coords'] -
                                     rec_ele_dict['coords'])**2).sum(axis=-1) *
                            138.94238460104697e4).sum(),  # convert to J/mol
                else:
                    ele += 0,
            vec += tuple(np.nan_to_num(ele))

            # Ligand Atom Types
            atoms = atoms_by_type(mol_dict, self.ligand_atom_types,
                                  'atom_types_ad4')
            vec += tuple([len(atoms[t]) for t in self.ligand_atom_types])

            # Close Contacts (<2.5A)
            vec += tuple(self.cc_25.build(mol).flatten())

            # H-Bonds (<4A)
            hbond_mol, hbond_rec, strict = hbonds(mol, protein, 4)
            # Retain only strict hbonds
            hbond_mol = hbond_mol[strict]
            hbond_rec = hbond_rec[strict]
            backbone = hbond_rec['isbackbone']
            alpha = hbond_rec['isalpha']
            beta = hbond_rec['isbeta']
            other = ~alpha & ~beta
            donor_mol = hbond_mol['isdonor']
            donor_rec = hbond_rec['isdonor']
            hbond_vec = ((donor_mol & backbone
                          & alpha).sum(), (donor_mol & backbone & beta).sum(),
                         (donor_mol & backbone & other).sum(),
                         (donor_mol & ~backbone
                          & alpha).sum(), (donor_mol & ~backbone & beta).sum(),
                         (donor_mol & ~backbone
                          & other).sum(), (donor_rec & backbone & alpha).sum(),
                         (donor_rec & backbone & beta).sum(),
                         (donor_rec & backbone & other).sum(),
                         (donor_rec & ~backbone & alpha).sum(),
                         (donor_rec & ~backbone
                          & beta).sum(), (donor_rec & ~backbone & other).sum())
            vec += tuple(hbond_vec)

            # Hydrophobic contacts (<4A)
            hydrophobic = hydrophobic_contacts(mol, protein, 4)[1]
            backbone = hydrophobic['isbackbone']
            alpha = hydrophobic['isalpha']
            beta = hydrophobic['isbeta']
            other = ~alpha & ~beta
            hyd_vec = ((backbone & alpha).sum(), (backbone & beta).sum(),
                       (backbone & other).sum(), (~backbone & alpha).sum(),
                       (~backbone & beta).sum(), (~backbone & other).sum(),
                       len(hydrophobic))
            vec += tuple(hyd_vec)

            # Pi-stacking (<7.5A)
            pi_mol, pi_rec, pi_paralel, pi_tshaped = pi_stacking(
                mol, protein, 7.5)
            alpha = pi_rec['isalpha'] & pi_paralel
            beta = pi_rec['isbeta'] & pi_paralel
            other = ~alpha & ~beta & pi_paralel
            pi_vec = (alpha.sum(), beta.sum(), other.sum())
            vec += tuple(pi_vec)

            # T-shaped Pi-Pi interaction
            alpha = pi_rec['isalpha'] & pi_tshaped
            beta = pi_rec['isbeta'] & pi_tshaped
            other = ~alpha & ~beta & pi_tshaped
            pi_t_vec = (alpha.sum(), beta.sum(), other.sum())

            # Pi-cation (<6A)
            pi_rec, cat_mol, strict = pi_cation(protein, mol, 6)
            alpha = pi_rec['isalpha'] & strict
            beta = pi_rec['isbeta'] & strict
            other = ~alpha & ~beta & strict
            pi_cat_vec = (alpha.sum(), beta.sum(), other.sum())

            pi_mol, cat_rec, strict = pi_cation(mol, protein, 6)
            alpha = cat_rec['isalpha'] & strict
            beta = cat_rec['isbeta'] & strict
            other = ~alpha & ~beta & strict
            pi_cat_vec += (alpha.sum(), beta.sum(), other.sum())

            vec += tuple(pi_cat_vec)

            # T-shape (perpendicular Pi's) (<7.5A)
            vec += tuple(pi_t_vec)

            # Active site flexibility (<4A)
            acitve_site = close_contacts(
                mol_dict[mol_dict['atomicnum'] != 1],
                protein_dict[protein_dict['atomicnum'] != 1],
                cutoff=4)[1]
            backbone = acitve_site['isbackbone']
            alpha = acitve_site['isalpha']
            beta = acitve_site['isbeta']
            other = ~alpha & ~beta
            as_flex = ((backbone & alpha).sum(), (backbone & beta).sum(),
                       (backbone & other).sum(), (~backbone & alpha).sum(),
                       (~backbone & beta).sum(), (~backbone & other).sum(),
                       len(acitve_site))
            vec += tuple(as_flex)

            # Salt bridges (<5.5)
            salt_bridge_dict = salt_bridges(mol, protein, 5.5)[1]
            vec += (salt_bridge_dict['isalpha'].sum(),
                    salt_bridge_dict['isbeta'].sum(),
                    (~salt_bridge_dict['isalpha']
                     & ~salt_bridge_dict['isbeta']).sum(),
                    len(salt_bridge_dict))

            # Rotatable bonds
            vec += mol.num_rotors,

            if desc is None:
                desc = np.zeros(len(vec), dtype=float)
            desc = np.vstack((desc, np.array(vec, dtype=float)))

        return desc[1:]
Ejemplo n.º 5
0
def InteractionFingerprint(ligand, protein, strict=True):
    """Interaction fingerprint accomplished by converting the molecular
    interaction of ligand-protein into bit array according to
    the residue of choice and the interaction. For every residue
    (One row = one residue) there are eight bits which represent
    eight type of interactions:

    - (Column 0) hydrophobic contacts
    - (Column 1) aromatic face to face
    - (Column 2) aromatic edge to face
    - (Column 3) hydrogen bond (protein as hydrogen bond donor)
    - (Column 4) hydrogen bond (protein as hydrogen bond acceptor)
    - (Column 5) salt bridges (protein positively charged)
    - (Column 6) salt bridges (protein negatively charged)
    - (Column 7) salt bridges (ionic bond with metal ion)

    Parameters
    ----------
    ligand, protein : oddt.toolkit.Molecule object
        Molecules, which are analysed in order to find interactions.

    strict : bool (deafult = True)
        If False, do not include condition, which informs whether atoms
        form 'strict' H-bond (pass all angular cutoffs).

    Returns
    -------
    InteractionFingerprint : numpy array
        Vector of calculated IFP (size = no residues * 8 type of interaction)

    """
    resids = np.unique(protein.atom_dict['resid'])
    IFP = np.zeros((len(resids), 8), dtype=np.uint8)

    # hydrophobic contacts (column = 0)
    hydrophobic = hydrophobic_contacts(protein, ligand)[0]['resid']
    np.add.at(IFP, (np.searchsorted(resids, np.sort(hydrophobic)[::-1]), 0), 1)

    # aromatic face to face (Column = 1), aromatic edge to face (Column = 2)
    rings, _, strict_parallel, strict_perpendicular = pi_stacking(
        protein, ligand)
    np.add.at(IFP, (np.searchsorted(
        resids, np.sort(rings[strict_parallel]['resid'])[::-1]), 1), 1)
    np.add.at(IFP, (np.searchsorted(
        resids, np.sort(rings[strict_perpendicular]['resid'])[::-1]), 2), 1)

    # h-bonds, protein as a donor (Column = 3)
    _, donors, strict0 = hbond_acceptor_donor(ligand, protein)
    if strict is False:
        strict0 = None
    np.add.at(IFP, (np.searchsorted(
        resids, np.sort(donors[strict0]['resid'])[::-1]), 3), 1)

    # h-bonds, protein as an acceptor (Column = 4)
    acceptors, _, strict1 = hbond_acceptor_donor(protein, ligand)
    if strict is False:
        strict1 = None
    np.add.at(IFP, (np.searchsorted(
        resids, np.sort(acceptors[strict1]['resid'])[::-1]), 4), 1)

    # salt bridges, protein positively charged (Column = 5)
    plus, _ = salt_bridge_plus_minus(protein, ligand)
    np.add.at(IFP, (np.searchsorted(resids, np.sort(plus['resid'])[::-1]), 5), 1)

    # salt bridges, protein negatively charged (Colum = 6)
    _, minus = salt_bridge_plus_minus(ligand, protein)
    np.add.at(IFP, (np.searchsorted(resids, np.sort(minus['resid'])[::-1]), 6), 1)

    # salt bridges, ionic bond with metal ion (Column = 7)
    _, metal, strict2 = acceptor_metal(protein, ligand)
    if strict is False:
        strict2 = None
    np.add.at(IFP, (np.searchsorted(
        resids, np.sort(metal[strict2]['resid'])[::-1]), 7), 1)

    return IFP.flatten()
Ejemplo n.º 6
0
def SimpleInteractionFingerprint(ligand, protein, strict=True):
    """Based on http://dx.doi.org/10.1016/j.csbj.2014.05.004.
    Every IFP consists of 8 bits per amino acid (One row = one amino acid)
    and present eight type of interaction:

    - (Column 0) hydrophobic contacts
    - (Column 1) aromatic face to face
    - (Column 2) aromatic edge to face
    - (Column 3) hydrogen bond (protein as hydrogen bond donor)
    - (Column 4) hydrogen bond (protein as hydrogen bond acceptor)
    - (Column 5) salt bridges (protein positively charged)
    - (Column 6) salt bridges (protein negatively charged)
    - (Column 7) salt bridges (ionic bond with metal ion)

    Returns matrix, which is sorted according to this pattern : 'ALA',
    'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE', 'LEU',
    'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL', ''.
    The '' means cofactor. Index of amino acid in pattern coresponds
    to row in returned matrix.

    Parameters
    ----------
    ligand, protein : oddt.toolkit.Molecule object
        Molecules, which are analysed in order to find interactions.

    strict : bool (deafult = True)
        If False, do not include condition, which informs whether atoms
        form 'strict' H-bond (pass all angular cutoffs).

    Returns
    -------
    InteractionFingerprint : numpy array
        Vector of calculated IFP (size = 168)

    """

    amino_acids = np.array(['', 'ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU',
                            'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE',
                            'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL'],
                           dtype='<U3')

    IFP = np.zeros((len(amino_acids), 8), dtype=np.uint8)

    # hydrophobic (Column = 0)
    hydrophobic = hydrophobic_contacts(protein, ligand)[0]['resname']
    hydrophobic[~np.in1d(hydrophobic, amino_acids)] = ''
    np.add.at(IFP, (np.searchsorted(amino_acids,
                                    np.sort(hydrophobic)[::-1]), 0), 1)

    # aromatic face to face (Column = 1), aromatic edge to face (Column = 2)
    rings, _, strict_parallel, strict_perpendicular = pi_stacking(
        protein, ligand)
    rings[strict_parallel]['resname'][~np.in1d(
        rings[strict_parallel]['resname'], amino_acids)] = ''
    np.add.at(IFP, (np.searchsorted(
        amino_acids, np.sort(rings[strict_parallel]['resname'])[::-1]), 1), 1)
    rings[strict_perpendicular]['resname'][~np.in1d(
        rings[strict_perpendicular]['resname'], amino_acids)] = ''
    np.add.at(IFP, (np.searchsorted(
        amino_acids,
        np.sort(rings[strict_perpendicular]['resname'])[::-1]), 2), 1)

    # hbonds donated by the protein (Column = 3)
    _, donors, strict0 = hbond_acceptor_donor(ligand, protein)
    donors['resname'][~np.in1d(donors['resname'], amino_acids)] = ''
    if strict is False:
        strict0 = None
    np.add.at(IFP, (np.searchsorted(
        amino_acids, np.sort(donors[strict0]['resname'])[::-1]), 3), 1)

    # hbonds donated by the ligand (Column = 4)
    acceptors, _, strict1 = hbond_acceptor_donor(protein, ligand)
    acceptors['resname'][~np.in1d(acceptors['resname'], amino_acids)] = ''
    if strict is False:
        strict1 = None
    np.add.at(IFP, (np.searchsorted(
        amino_acids, np.sort(acceptors[strict1]['resname'])[::-1]), 4), 1)

    # ionic bond with protein cation(Column = 5)
    plus, _ = salt_bridge_plus_minus(protein, ligand)
    plus['resname'][~np.in1d(plus['resname'], amino_acids)] = ''
    np.add.at(IFP, (np.searchsorted(amino_acids,
                                    np.sort(plus['resname'])[::-1]), 5), 1)

    # ionic bond with protein anion(Column = 6)
    _, minus = salt_bridge_plus_minus(ligand, protein)
    minus['resname'][~np.in1d(minus['resname'], amino_acids)] = ''
    np.add.at(IFP, (np.searchsorted(amino_acids,
                                    np.sort(minus['resname'])[::-1]), 6), 1)

    # ionic bond with metal ion (Column = 7)
    _, metal, strict2 = acceptor_metal(protein, ligand)
    metal['resname'][~np.in1d(metal['resname'], amino_acids)] = ''
    if strict is False:
        strict2 = None
    np.add.at(IFP, (np.searchsorted(
        amino_acids, np.sort(metal[strict2]['resname'])[::-1]), 7), 1)

    return IFP.flatten()
Ejemplo n.º 7
0
def InteractionFingerprint(ligand, protein, strict=True):
    """Interaction fingerprint accomplished by converting the molecular
    interaction of ligand-protein into bit array according to
    the residue of choice and the interaction. For every residue
    (One row = one residue) there are eight bits which represent
    eight type of interactions:

    - (Column 0) hydrophobic contacts
    - (Column 1) aromatic face to face
    - (Column 2) aromatic edge to face
    - (Column 3) hydrogen bond (protein as hydrogen bond donor)
    - (Column 4) hydrogen bond (protein as hydrogen bond acceptor)
    - (Column 5) salt bridges (protein positively charged)
    - (Column 6) salt bridges (protein negatively charged)
    - (Column 7) salt bridges (ionic bond with metal ion)

    Parameters
    ----------
    ligand, protein : oddt.toolkit.Molecule object
        Molecules, which are analysed in order to find interactions.

    strict : bool (deafult = True)
        If False, do not include condition, which informs whether atoms
        form 'strict' H-bond (pass all angular cutoffs).

    Returns
    -------
    InteractionFingerprint : numpy array
        Vector of calculated IFP (size = no residues * 8 type of interaction)

    """
    resids = np.unique(protein.atom_dict['resid'])
    IFP = np.zeros((len(resids), 8), dtype=np.uint8)

    # hydrophobic contacts (column = 0)
    hydrophobic = hydrophobic_contacts(protein, ligand)[0]['resid']
    np.add.at(IFP, [np.searchsorted(resids, np.sort(hydrophobic)[::-1]), 0], 1)

    # aromatic face to face (Column = 1), aromatic edge to face (Column = 2)
    rings, _, strict_parallel, strict_perpendicular = pi_stacking(
        protein, ligand)
    np.add.at(IFP, [np.searchsorted(
        resids, np.sort(rings[strict_parallel]['resid'])[::-1]), 1], 1)
    np.add.at(IFP, [np.searchsorted(
        resids, np.sort(rings[strict_perpendicular]['resid'])[::-1]), 2], 1)

    # h-bonds, protein as a donor (Column = 3)
    _, donors, strict0 = hbond_acceptor_donor(ligand, protein)
    if strict is False:
        strict0 = None
    np.add.at(IFP, [np.searchsorted(
        resids, np.sort(donors[strict0]['resid'])[::-1]), 3], 1)

    # h-bonds, protein as an acceptor (Column = 4)
    acceptors, _, strict1 = hbond_acceptor_donor(protein, ligand)
    if strict is False:
        strict1 = None
    np.add.at(IFP, [np.searchsorted(
        resids, np.sort(acceptors[strict1]['resid'])[::-1]), 4], 1)

    # salt bridges, protein positively charged (Column = 5)
    plus, _ = salt_bridge_plus_minus(protein, ligand)
    np.add.at(IFP, [np.searchsorted(resids, np.sort(plus['resid'])[::-1]), 5], 1)

    # salt bridges, protein negatively charged (Colum = 6)
    _, minus = salt_bridge_plus_minus(ligand, protein)
    np.add.at(IFP, [np.searchsorted(resids, np.sort(minus['resid'])[::-1]), 6], 1)

    # salt bridges, ionic bond with metal ion (Column = 7)
    _, metal, strict2 = acceptor_metal(protein, ligand)
    if strict is False:
        strict2 = None
    np.add.at(IFP, [np.searchsorted(
        resids, np.sort(metal[strict2]['resid'])[::-1]), 7], 1)

    return IFP.flatten()
Ejemplo n.º 8
0
def SimpleInteractionFingerprint(ligand, protein, strict=True):
    """Based on http://dx.doi.org/10.1016/j.csbj.2014.05.004.
    Every IFP consists of 8 bits per amino acid (One row = one amino acid)
    and present eight type of interaction:

    - (Column 0) hydrophobic contacts
    - (Column 1) aromatic face to face
    - (Column 2) aromatic edge to face
    - (Column 3) hydrogen bond (protein as hydrogen bond donor)
    - (Column 4) hydrogen bond (protein as hydrogen bond acceptor)
    - (Column 5) salt bridges (protein positively charged)
    - (Column 6) salt bridges (protein negatively charged)
    - (Column 7) salt bridges (ionic bond with metal ion)

    Returns matrix, which is sorted according to this pattern : 'ALA',
    'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE', 'LEU',
    'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL', ''.
    The '' means cofactor. Index of amino acid in pattern coresponds
    to row in returned matrix.

    Parameters
    ----------
    ligand, protein : oddt.toolkit.Molecule object
        Molecules, which are analysed in order to find interactions.

    strict : bool (deafult = True)
        If False, do not include condition, which informs whether atoms
        form 'strict' H-bond (pass all angular cutoffs).

    Returns
    -------
    InteractionFingerprint : numpy array
        Vector of calculated IFP (size = 168)

    """

    amino_acids = np.array(['', 'ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU',
                            'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE',
                            'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL'],
                           dtype='<U3')

    IFP = np.zeros((len(amino_acids), 8), dtype=np.uint8)

    # hydrophobic (Column = 0)
    hydrophobic = hydrophobic_contacts(protein, ligand)[0]['resname']
    hydrophobic[~np.in1d(hydrophobic, amino_acids)] = ''
    np.add.at(IFP, [np.searchsorted(amino_acids,
                                    np.sort(hydrophobic)[::-1]), 0], 1)

    # aromatic face to face (Column = 1), aromatic edge to face (Column = 2)
    rings, _, strict_parallel, strict_perpendicular = pi_stacking(
        protein, ligand)
    rings[strict_parallel]['resname'][~np.in1d(
        rings[strict_parallel]['resname'], amino_acids)] = ''
    np.add.at(IFP, [np.searchsorted(
        amino_acids, np.sort(rings[strict_parallel]['resname'])[::-1]), 1], 1)
    rings[strict_perpendicular]['resname'][~np.in1d(
        rings[strict_perpendicular]['resname'], amino_acids)] = ''
    np.add.at(IFP, [np.searchsorted(
        amino_acids,
        np.sort(rings[strict_perpendicular]['resname'])[::-1]), 2], 1)

    # hbonds donated by the protein (Column = 3)
    _, donors, strict0 = hbond_acceptor_donor(ligand, protein)
    donors['resname'][~np.in1d(donors['resname'], amino_acids)] = ''
    if strict is False:
        strict0 = None
    np.add.at(IFP, [np.searchsorted(
        amino_acids, np.sort(donors[strict0]['resname'])[::-1]), 3], 1)

    # hbonds donated by the ligand (Column = 4)
    acceptors, _, strict1 = hbond_acceptor_donor(protein, ligand)
    acceptors['resname'][~np.in1d(acceptors['resname'], amino_acids)] = ''
    if strict is False:
        strict1 = None
    np.add.at(IFP, [np.searchsorted(
        amino_acids, np.sort(acceptors[strict1]['resname'])[::-1]), 4], 1)

    # ionic bond with protein cation(Column = 5)
    plus, _ = salt_bridge_plus_minus(protein, ligand)
    plus['resname'][~np.in1d(plus['resname'], amino_acids)] = ''
    np.add.at(IFP, [np.searchsorted(amino_acids,
                                    np.sort(plus['resname'])[::-1]), 5], 1)

    # ionic bond with protein anion(Column = 6)
    _, minus = salt_bridge_plus_minus(ligand, protein)
    minus['resname'][~np.in1d(minus['resname'], amino_acids)] = ''
    np.add.at(IFP, [np.searchsorted(amino_acids,
                                    np.sort(minus['resname'])[::-1]), 6], 1)

    # ionic bond with metal ion (Column = 7)
    _, metal, strict2 = acceptor_metal(protein, ligand)
    metal['resname'][~np.in1d(metal['resname'], amino_acids)] = ''
    if strict is False:
        strict2 = None
    np.add.at(IFP, [np.searchsorted(
        amino_acids, np.sort(metal[strict2]['resname'])[::-1]), 7], 1)

    return IFP.flatten()
Ejemplo n.º 9
0
Archivo: binana.py Proyecto: DrewG/oddt
 def build(self, ligands, protein = None):
     """ Descriptor building method
     
     Parameters
     ----------
         ligands: array-like
             An array of generator of oddt.toolkit.Molecule objects for which the descriptor is computed
         
         protein: oddt.toolkit.Molecule object (default=None)
             Protein object to be used while generating descriptors. If none, then the default protein (from constructor) is used. Otherwise, protein becomes new global and default protein.
     
     Returns
     -------
         descs: numpy array, shape=[n_samples, 351]
             An array of binana descriptors, aligned with input ligands
     """
     if protein:
         self.set_protein(protein)
     else:
         protein = self.protein
     protein_dict = protein.atom_dict
     desc = None
     for mol in ligands:
         mol_dict = mol.atom_dict
         vec = np.array([], dtype=float)
         vec = tuple()
         # Vina
         ### TODO: Asynchronous output from vina, push command to score and retrieve at the end?
         ### TODO: Check if ligand has vina scores
         scored_mol = self.vina.score(mol, single=True)[0].data
         vina_scores = ['vina_affinity', 'vina_gauss1', 'vina_gauss2', 'vina_repulsion', 'vina_hydrophobic', 'vina_hydrogen']
         vec += tuple([scored_mol[key] for key in vina_scores])
         
         # Close Contacts (<4A)
         vec += tuple(self.cc_4.build(mol, single=True).flatten())
         
         # Electrostatics (<4A)
         ele_types = (('A', 'A'), ('A', 'C'), ('A', 'CL'), ('A', 'F'), ('A', 'FE'), ('A', 'HD'), ('A', 'MG'), ('A', 'MN'), ('A', 'N'), ('A', 'NA'), ('A', 'OA'), ('A', 'SA'), ('A', 'ZN'), ('BR', 'C'), ('BR', 'HD'), ('BR', 'OA'), ('C', 'C'), ('C', 'CL'), ('C', 'F'), ('C', 'HD'), ('C', 'MG'), ('C', 'MN'), ('C', 'N'), ('C', 'NA'), ('C', 'OA'), ('C', 'SA'), ('C', 'ZN'), ('CL', 'FE'), ('CL', 'HD'), ('CL', 'MG'), ('CL', 'N'), ('CL', 'OA'), ('CL', 'ZN'), ('F', 'HD'), ('F', 'N'), ('F', 'OA'), ('F', 'SA'), ('F', 'ZN'), ('FE', 'HD'), ('FE', 'N'), ('FE', 'OA'), ('HD', 'HD'), ('HD', 'I'), ('HD', 'MG'), ('HD', 'MN'), ('HD', 'N'), ('HD', 'NA'), ('HD', 'OA'), ('HD', 'P'), ('HD', 'S'), ('HD', 'SA'), ('HD', 'ZN'), ('MG', 'NA'), ('MG', 'OA'), ('MN', 'N'), ('MN', 'OA'), ('N', 'N'), ('N', 'NA'), ('N', 'OA'), ('N', 'SA'), ('N', 'ZN'), ('NA', 'OA'), ('NA', 'SA'), ('NA', 'ZN'), ('OA', 'OA'), ('OA', 'SA'), ('OA', 'ZN'), ('S', 'ZN'), ('SA', 'ZN'), ('A', 'BR'), ('A', 'I'), ('A', 'P'), ('A', 'S'), ('BR', 'N'), ('BR', 'SA'), ('C', 'FE'), ('C', 'I'), ('C', 'P'), ('C', 'S'), ('CL', 'MN'), ('CL', 'NA'), ('CL', 'P'), ('CL', 'S'), ('CL', 'SA'), ('CU', 'HD'), ('CU', 'N'), ('FE', 'NA'), ('FE', 'SA'), ('I', 'N'), ('I', 'OA'), ('MG', 'N'), ('MG', 'P'), ('MG', 'S'), ('MG', 'SA'), ('MN', 'NA'), ('MN', 'P'), ('MN', 'S'), ('MN', 'SA'), ('N', 'P'), ('N', 'S'), ('NA', 'P'), ('NA', 'S'), ('OA', 'P'), ('OA', 'S'), ('P', 'S'), ('P', 'SA'), ('P', 'ZN'), ('S', 'SA'), ('SA', 'SA'))
         ele_rec_types, ele_lig_types = zip(*ele_types)
         ele_mol_atoms = atoms_by_type(mol_dict, ele_lig_types, 'atom_types_ad4')
         ele_rec_atoms = atoms_by_type(protein_dict, ele_rec_types, 'atom_types_ad4')
         ele = tuple()
         for r_t, m_t in ele_types:
             mol_ele_dict, rec_ele_dict = interactions.close_contacts(ele_mol_atoms[m_t], ele_rec_atoms[r_t], 4)
             if len(mol_ele_dict) and len(rec_ele_dict):
                 ele += (mol_ele_dict['charge'] * rec_ele_dict['charge']/ np.sqrt((mol_ele_dict['coords'] - rec_ele_dict['coords'])**2).sum(axis=-1) * 138.94238460104697e4).sum(), # convert to J/mol
             else:
                 ele += 0,
         vec += tuple(ele)
         
         # Ligand Atom Types
         ligand_atom_types = ['A', 'BR', 'C', 'CL', 'F', 'HD', 'I', 'N', 'NA', 'OA', 'P', 'S', 'SA']
         atoms = atoms_by_type(mol_dict, ligand_atom_types, 'atom_types_ad4')
         atoms_counts = [len(atoms[t]) for t in ligand_atom_types]
         vec += tuple(atoms_counts)
         
         # Close Contacts (<2.5A)
         vec += tuple(self.cc_25.build(mol, single=True).flatten())
         
         # H-Bonds (<4A)
         hbond_mol, hbond_rec, strict = interactions.hbond(mol, protein, 4)
         # Retain only strict hbonds
         hbond_mol = hbond_mol[strict]
         hbond_rec = hbond_rec[strict]
         backbone = hbond_rec['isbackbone']
         alpha = hbond_rec['isalpha']
         beta = hbond_rec['isbeta']
         other = ~alpha & ~beta
         donor_mol = hbond_mol['isdonor']
         donor_rec = hbond_rec['isdonor']
         hbond_vec = ((donor_mol & backbone & alpha).sum(), (donor_mol & backbone & beta).sum(), (donor_mol & backbone & other).sum(),
                     (donor_mol & ~backbone & alpha).sum(), (donor_mol & ~backbone & beta).sum(), (donor_mol & ~backbone & other).sum(),
                     (donor_rec & backbone & alpha).sum(), (donor_rec & backbone & beta).sum(), (donor_rec & backbone & other).sum(),
                     (donor_rec & ~backbone & alpha).sum(), (donor_rec & ~backbone & beta).sum(), (donor_rec & ~backbone & other).sum())
         vec += tuple(hbond_vec)
         
         # Hydrophobic contacts (<4A)
         hydrophobic = interactions.hydrophobic_contacts(mol, protein, 4)[1]
         backbone = hydrophobic['isbackbone']
         alpha = hydrophobic['isalpha']
         beta = hydrophobic['isbeta']
         other = ~alpha & ~beta
         hyd_vec = ((backbone & alpha).sum(), (backbone & beta).sum(), (backbone & other).sum(),
                    (~backbone & alpha).sum(), (~backbone & beta).sum(), (~backbone & other).sum(), len(hydrophobic))
         vec += tuple(hyd_vec)
         
         # Pi-stacking (<7.5A)
         pi_mol, pi_rec, pi_paralel, pi_tshaped = interactions.pi_stacking(mol, protein, 7.5)
         alpha = pi_rec['isalpha'] & pi_paralel
         beta = pi_rec['isbeta'] & pi_paralel
         other = ~alpha & ~beta & pi_paralel
         pi_vec = (alpha.sum(), beta.sum(), other.sum())
         vec += tuple(pi_vec)
         
         # count T-shaped Pi-Pi interaction
         alpha = pi_rec['isalpha'] & pi_tshaped
         beta = pi_rec['isbeta'] & pi_tshaped
         other = ~alpha & ~beta & pi_tshaped
         pi_t_vec = (alpha.sum(), beta.sum(), other.sum())
         
         # Pi-cation (<6A)
         pi_rec, cat_mol, strict = interactions.pi_cation(protein, mol, 6)
         alpha = pi_rec['isalpha'] & strict
         beta = pi_rec['isbeta'] & strict
         other = ~alpha & ~beta & strict
         pi_cat_vec = (alpha.sum(), beta.sum(), other.sum())
         
         pi_mol, cat_rec, strict = interactions.pi_cation(mol, protein, 6)
         alpha = cat_rec['isalpha'] & strict
         beta = cat_rec['isbeta'] & strict
         other = ~alpha & ~beta & strict
         pi_cat_vec += (alpha.sum(), beta.sum(), other.sum())
         
         vec += tuple(pi_cat_vec)
         
         # T-shape (perpendicular Pi's) (<7.5A)
         vec += tuple(pi_t_vec)
         
         # Active site flexibility (<4A)
         acitve_site = interactions.close_contacts(mol_dict, protein_dict, 4)[1]
         backbone = acitve_site['isbackbone']
         alpha = acitve_site['isalpha']
         beta = acitve_site['isbeta']
         other = ~alpha & ~beta
         as_flex = ((backbone & alpha).sum(), (backbone & beta).sum(), (backbone & other).sum(),
                    (~backbone & alpha).sum(), (~backbone & beta).sum(), (~backbone & other).sum(), len(acitve_site))
         vec += tuple(as_flex)
         
         # Salt bridges (<5.5)
         salt_bridges = interactions.salt_bridges(mol, protein, 5.5)[1]
         vec += (salt_bridges['isalpha'].sum(), salt_bridges['isbeta'].sum(),
                                (~salt_bridges['isalpha'] & ~salt_bridges['isbeta']).sum(), len(salt_bridges))
         
         # Rotatable bonds
         vec += mol.num_rotors,
         
         if desc is None:
             desc = np.zeros(len(vec), dtype=float)
         desc = np.vstack((desc, np.array(vec, dtype=float)))
     
     return desc[1:]
Ejemplo n.º 10
0
def InteractionCheck(ppath, Listoflig, cur_dir):
    global proteinpath
    proteinpath = ppath
    os.chdir(os.path.dirname(proteinpath))
#    pname = os.path.basename(proteinpath)

    # protein = next(oddt.toolkit.readfile('pdb', proteinpath, removeHs=False, cleanupSubstructures=False, sanitize=False))
    try:
        protein = next(oddt.toolkit.readfile('pdb', proteinpath, removeHs=False))
        protein.protein = True
    except Exception as e:

        print("Input structure could not be split into protein and ligand. Please check ligand identifier.")
        f2 = open(os.path.join(os.path.basename(proteinpath), 'ErrorLog.txt'), 'w')
        f2.write(str(e))
        f2.close()


    for ligand_object in Listoflig:
        ligandname = ligand_object.PoseNameExt

        ResReport = ligand_object.PoseName + "_ResidueReport.csv"
        path = os.path.join(cur_dir, 'Fingerprint', ResReport)

        file = open(path, 'w')
        file.write("Ligand interactions with protein residues\n")
        file.close()

        # Read in and define the reference ligand
        ligand = next(oddt.toolkit.readfile('pdb', ligandname, removeHs=False))

        # Hydrophobic interactions
        p_hydroph, l_hydroph = interactions.hydrophobic_contacts(protein, ligand)
        InteractionsFile(p_hydroph, l_hydroph, path, 'hydrophobic')

        # h bonds
        p_hbonds, l_hbonds, strict = interactions.hbonds(protein, ligand)
        InteractionsFile(p_hbonds, l_hbonds, path, 'hydrogen bond')

        # halogens
        p_halogen, l_halogen, strict = interactions.halogenbonds(protein, ligand)
        InteractionsFile(p_halogen, l_halogen, path, 'halogen bond')

        # pistacking bonds
        pi_interactions = interactions.pi_stacking(protein, ligand)
        InteractionsFile(pi_interactions[0], pi_interactions[2], path, 'pi stacking')

        # salt bridges
        p_salt_bridges, l_salt_bridges = interactions.salt_bridges(protein, ligand)
        InteractionsFile(p_salt_bridges, l_salt_bridges, path, 'salt bridge')

        # pi_cation
        p_pi_cation, l_pi_cation, strict = interactions.pi_cation(protein, ligand)
        InteractionsFile(p_pi_cation, l_pi_cation, path, 'pi cation')

        # acceptor_metal bonds
        p_acceptor_metal_a, acceptor_metal_a, strict = interactions.acceptor_metal(protein, ligand)
        InteractionsFile(p_acceptor_metal_a, acceptor_metal_a, path, 'acceptor metal')

        # pi_metal bonds

        p_pi_metal, l_pi_metal, strict = interactions.pi_metal(protein, ligand)
        InteractionsFile(p_pi_metal, l_pi_metal, path, 'pi metal')
Ejemplo n.º 11
0
def test_pi_stacking():
    """Pi-stacking test"""
    lig = next(
        oddt.toolkit.readfile(
            'sdf',
            os.path.join(test_data_dir, 'data', 'pdbbind', '10gs',
                         '10gs_ligand.sdf')))
    rec = next(
        oddt.toolkit.readfile(
            'pdb',
            os.path.join(test_data_dir, 'data', 'pdbbind', '10gs',
                         '10gs_pocket.pdb')))
    rec.protein = True
    ring, _, strict_parallel, strict_perpendicular = pi_stacking(rec,
                                                                 lig,
                                                                 cutoff=7.5,
                                                                 tolerance=60)

    lig_centroids = [[5.4701666, 6.1994996, 30.8313350],
                     [8.1811666, 2.5846664, 28.4028320]]

    lig_vectors = [[-0.474239, 0.898374, 1.326541],
                   [0.62094, 1.120537, 1.086084]]

    rec_centroids = [[5.6579995, 2.2964999, 23.4626674],
                     [7.8634004, 7.7310004, 34.8283996],
                     [9.8471670, 8.5676660, 34.9915008],
                     [9.9951667, 3.7756664, 32.8191680],
                     [10.055333, -1.4720000, 17.2121658],
                     [14.519165, 1.8759999, 29.8346652],
                     [16.490833, 16.873500, 27.9169998],
                     [18.718666, 12.703166, 33.3141670],
                     [25.716165, 4.9741668, 31.8198337]]
    rec_vectors = [[-1.610038, 0.445293, 0.219816],
                   [-1.465347, -0.270806, -0.786749],
                   [-1.451653, -0.268732, 0.791577],
                   [-1.108574, 1.233418, -0.239182],
                   [-0.448415, -0.427071, -1.564852],
                   [0.230433, 0.007991, 1.662302],
                   [0.475315, -0.971355, -0.778596],
                   [0.484955, 1.471549, 0.672478],
                   [0.600022, -1.235512, -0.987680]]

    centroids_dist = [[8.3406204, 5.5546951], [4.9040379, 8.2385464],
                      [6.4863953, 9.0544131], [5.5047319, 4.9206809],
                      [16.2897951, 12.0498984], [10.0782127, 6.5362510],
                      [15.6167449, 16.5365460], [14.9661240, 15.4124670],
                      [20.3071175, 18.0239224]]

    assert_array_almost_equal(distance(rec_centroids, lig_centroids),
                              centroids_dist)

    assert len(lig.ring_dict) == 2
    assert_array_almost_equal(sorted(lig.ring_dict['centroid'].tolist()),
                              lig_centroids,
                              decimal=5)
    assert_array_almost_equal(sorted(lig.ring_dict['vector'].tolist()),
                              lig_vectors,
                              decimal=5)
    assert len(rec.ring_dict) == 9
    assert_array_almost_equal(sorted(rec.ring_dict['centroid'].tolist()),
                              rec_centroids,
                              decimal=5)
    assert_array_almost_equal(sorted(rec.ring_dict['vector'].tolist()),
                              rec_vectors,
                              decimal=5)

    assert len(ring) == 6
    assert strict_parallel.sum() == 4
    assert strict_perpendicular.sum() == 3
    resids = sorted(ring['resid'])
    # assert_array_equal(resids, [1, 2, 2, 17, 17, 58])
    # re-check indexing of residues
    assert_array_equal(rec.res_dict[resids]['resnum'], [7, 8, 8, 38, 38, 108])
    assert_array_equal(sorted(ring['resnum']), [7, 8, 8, 38, 38, 108])
    assert_array_equal(sorted(ring['resname']),
                       ['PHE', 'PHE', 'TRP', 'TRP', 'TYR', 'TYR'])
Ejemplo n.º 12
0
def test_pi_stacking():
    """Pi-stacking test"""
    lig = next(
        oddt.toolkit.readfile(
            'sdf',
            os.path.join(test_data_dir, 'data', 'pdbbind', '10gs',
                         '10gs_ligand.sdf')))
    rec = next(
        oddt.toolkit.readfile(
            'pdb',
            os.path.join(test_data_dir, 'data', 'pdbbind', '10gs',
                         '10gs_pocket.pdb')))
    rec.protein = True
    ring, _, strict_parallel, strict_perpendicular = pi_stacking(rec,
                                                                 lig,
                                                                 cutoff=7.5,
                                                                 tolerance=60)

    lig_centroids = [[5.4701666, 6.1994996, 30.8313350],
                     [8.1811666, 2.5846664, 28.4028320]]

    lig_vectors = [[0.471341, 0.023758, 0.857421],
                   [0.772514, 0.518052, 1.354878]]

    rec_centroids = [[5.6579995, 2.2964999, 23.4626674],
                     [7.8634004, 7.7310004, 34.8283996],
                     [9.8471670, 8.5676660, 34.9915008],
                     [9.9951667, 3.7756664, 32.8191680],
                     [10.055333, -1.4720000, 17.2121658],
                     [14.519165, 1.8759999, 29.8346652],
                     [16.490833, 16.873500, 27.9169998],
                     [18.718666, 12.703166, 33.3141670],
                     [25.716165, 4.9741668, 31.8198337]]
    rec_vectors = [[0.197429, -0.044215, 0.937011],
                   [0.271271, 0.082978, 1.345218],
                   [0.335873, 0.659132, 1.307615],
                   [0.531174, 0.010709, -0.066511],
                   [0.535675, 0.512232, -0.519266],
                   [0.788169, 0.233635, -0.698541],
                   [1.097706, 0.017989, 1.071040],
                   [1.147590, 0.122895, 0.798543],
                   [1.347235, 0.516426, -0.461548]]

    centroids_dist = [[8.3406204, 5.5546951], [4.9040379, 8.2385464],
                      [6.4863953, 9.0544131], [5.5047319, 4.9206809],
                      [16.2897951, 12.0498984], [10.0782127, 6.5362510],
                      [15.6167449, 16.5365460], [14.9661240, 15.4124670],
                      [20.3071175, 18.0239224]]

    assert_array_almost_equal(distance(rec_centroids, lig_centroids),
                              centroids_dist)

    assert len(lig.ring_dict) == 2
    assert_array_almost_equal(sorted(lig.ring_dict['centroid'].tolist()),
                              lig_centroids,
                              decimal=5)
    assert_array_almost_equal(sorted(lig.ring_dict['vector'].tolist()),
                              lig_vectors,
                              decimal=5)
    assert len(rec.ring_dict) == 9
    assert_array_almost_equal(sorted(rec.ring_dict['centroid'].tolist()),
                              rec_centroids,
                              decimal=5)
    assert_array_almost_equal(sorted(rec.ring_dict['vector'].tolist()),
                              rec_vectors,
                              decimal=5)

    assert len(ring) == 6
    assert strict_parallel.sum() == 3
    assert strict_perpendicular.sum() == 0
    resids = sorted(ring['resid'])
    # assert_array_equal(resids, [1, 2, 2, 17, 17, 58])
    # re-check indexing of residues
    assert_array_equal(rec.res_dict[resids]['resnum'], [7, 8, 8, 38, 38, 108])
    assert_array_equal(sorted(ring['resnum']), [7, 8, 8, 38, 38, 108])
    assert_array_equal(sorted(ring['resname']),
                       ['PHE', 'PHE', 'TRP', 'TRP', 'TYR', 'TYR'])
Ejemplo n.º 13
0
def test_pi_stacking():
    """Pi-stacking test"""
    lig = next(oddt.toolkit.readfile('sdf', os.path.join(test_data_dir, 'data',
                                                         'pdbbind', '10gs',
                                                         '10gs_ligand.sdf')))
    rec = next(oddt.toolkit.readfile('pdb', os.path.join(test_data_dir, 'data',
                                                         'pdbbind', '10gs',
                                                         '10gs_pocket.pdb')))
    rec.protein = True
    ring, _, strict_parallel, strict_perpendicular = pi_stacking(rec, lig, cutoff=7.5, tolerance=60)

    lig_centroids = [[5.4701666, 6.1994996, 30.8313350],
                     [8.1811666, 2.5846664, 28.4028320]]

    lig_vectors = [[0.471341, 0.023758, 0.857421],
                   [0.772514, 0.518052, 1.354878]]

    rec_centroids = [[5.6579995, 2.2964999, 23.4626674],
                     [7.8634004, 7.7310004, 34.8283996],
                     [9.8471670, 8.5676660, 34.9915008],
                     [9.9951667, 3.7756664, 32.8191680],
                     [10.055333, -1.4720000, 17.2121658],
                     [14.519165, 1.8759999, 29.8346652],
                     [16.490833, 16.873500, 27.9169998],
                     [18.718666, 12.703166, 33.3141670],
                     [25.716165, 4.9741668, 31.8198337]]
    rec_vectors = [[0.197429, -0.044215, 0.937011],
                   [0.271271, 0.082978, 1.345218],
                   [0.335873, 0.659132, 1.307615],
                   [0.531174, 0.010709, -0.066511],
                   [0.535675, 0.512232, -0.519266],
                   [0.788169, 0.233635, -0.698541],
                   [1.097706, 0.017989, 1.071040],
                   [1.147590, 0.122895, 0.798543],
                   [1.347235, 0.516426, -0.461548]]

    centroids_dist = [[8.3406204, 5.5546951],
                      [4.9040379, 8.2385464],
                      [6.4863953, 9.0544131],
                      [5.5047319, 4.9206809],
                      [16.2897951, 12.0498984],
                      [10.0782127, 6.5362510],
                      [15.6167449, 16.5365460],
                      [14.9661240, 15.4124670],
                      [20.3071175, 18.0239224]]

    assert_array_almost_equal(distance(rec_centroids, lig_centroids), centroids_dist)

    assert len(lig.ring_dict) == 2
    assert_array_almost_equal(sorted(lig.ring_dict['centroid'].tolist()), lig_centroids, decimal=5)
    assert_array_almost_equal(sorted(lig.ring_dict['vector'].tolist()), lig_vectors, decimal=5)
    assert len(rec.ring_dict) == 9
    assert_array_almost_equal(sorted(rec.ring_dict['centroid'].tolist()), rec_centroids, decimal=5)
    assert_array_almost_equal(sorted(rec.ring_dict['vector'].tolist()), rec_vectors, decimal=5)

    assert len(ring) == 6
    assert strict_parallel.sum() == 3
    assert strict_perpendicular.sum() == 0
    resids = sorted(ring['resid'])
    # assert_array_equal(resids, [1, 2, 2, 17, 17, 58])
    # re-check indexing of residues
    assert_array_equal(rec.res_dict[resids]['resnum'], [7, 8, 8, 38, 38, 108])
    assert_array_equal(sorted(ring['resnum']), [7, 8, 8, 38, 38, 108])
    assert_array_equal(sorted(ring['resname']), ['PHE', 'PHE', 'TRP', 'TRP', 'TYR', 'TYR'])