コード例 #1
0
ファイル: analysis.py プロジェクト: dhingra-s/protlego
def calc_dist_matrix(chimera: Chimera = None, filename: str = None, selection: str = 'residue', type='contacts',
                     plot=False):
    """
    Returns a matrix of C-alpha distances for a given pdb
    :param chimera: A Chimera object with n residues.
    :param filename: path to a pdb file
    :param selection: How to compute the distance. 'residue' (the closest two
    :param type: between contacts (contact map when distances are below 8 armstrongs) or distances atoms between two residues) or 'alpha' distance of the alpha carbons.
    :param plot: whether to plot the distance matrix. Default is False
    :return: matrix. np.array. An n by n distance matrix.
    """
    if chimera and filename:
        raise ValueError("Only a Chimera object or the path to a pdb file must be specified")
    if not chimera and not filename:
        raise ValueError("At least a Chimera object or the path to a pdb file must be specified")
    if filename:
        chimera = Chimera(filename=filename)

    if selection == 'residue':
        metr = MetricSelfDistance("protein", groupsel="residue", metric="distances", pbc=False)
        mapping = metr.getMapping(chimera)
        a = metr.project(chimera)
        matrix, _, _ = contactVecToMatrix(a[0], mapping.atomIndexes)
    elif selection == 'alpha':
        metr = MetricSelfDistance("protein and name CA", metric="distances", pbc=False)
        a = metr.project(chimera)
        mapping = metr.getMapping(chimera)
        matrix, _, _ = contactVecToMatrix(a, mapping.atomIndexes)
    else:
        raise ValueError("Specify a selection type: 'residue' or 'atom'")
    if type == "contacts":
        matrix = matrix < 8
    elif type != "contacts" and type != "distances":
        raise ValueError("Please select contact type between 'contacts' or distances")

    if plot:
        fig = plt.figure(figsize=(12, 12))
        ax = fig.add_subplot(111)
        cmap = 'binary'
        cax = ax.imshow(matrix, cmap=matplotlib.cm.get_cmap(cmap), interpolation='nearest', origin="lower")
        if type == 'distances':
            cmap = 'gist_rainbow'
            cax = ax.imshow(matrix, cmap=matplotlib.cm.get_cmap(cmap), interpolation='nearest', origin="lower")
            cbar = fig.colorbar(cax, cmap=matplotlib.cm.get_cmap(cmap))
        plt.xlabel('xlabel', fontsize=24)
        plt.ylabel('ylabel', fontsize=24)
        plt.xticks(fontsize=22)
        plt.yticks(fontsize=22)
        plt.xlabel("Residue index")
        plt.ylabel("Residue index")

    return matrix
コード例 #2
0
ファイル: analysis.py プロジェクト: dhingra-s/protlego
def calc_contact_order(chimera: Chimera = None, filename: str = None, diss_cutoff: int = 8):
    """
    The contact order of a protein is a measure of the locality of the inter-amino acid contacts in the
    native folded state. It is computed as the average seqeuence distance between residues that form contacts
    below a threshold in the folded protein divided by the total length of the protein"
    :param chimera: A Chimera object with n residues.
    :param filename: path to a pdb file
    :param diss_cutoff: The maximum distance in Armstrong between two residues to be in contact, default 8 Angstroms
    :return: the contact order (%)
    """
    if chimera and filename:
        raise ValueError("Only a Chimera object or the path to a pdb file must be specified")
    if not chimera and not filename:
        raise ValueError("At least a Chimera object or the path to a pdb file must be specified")
    if filename:
        chimera = Chimera(filename=filename)
    chimera.renumberResidues()
    metr = MetricSelfDistance("protein and noh", groupsel="residue", metric="contacts", threshold=diss_cutoff,
                              pbc=False)
    a = metr.project(chimera)
    mapping = metr.getMapping(chimera)
    matrix, _, _ = contactVecToMatrix(a[0], mapping.atomIndexes)
    triang = np.triu(matrix)
    idx1, idx2 = np.where(triang)
    total_contacts = len(idx1)
    total_residues = chimera.numResidues
    summation = np.sum(idx2 - idx1)
    co = 1 / (total_contacts * total_residues) * summation
    print(f"Contact order is {co*100} %")
    return co * 100