Example #1
0
def for_lysozyme():
    """
    try various version of distance matrix for only lysozyme
    """

    import sys

    method = sys.argv[1]

    paths = [
        "CEpiMatch.csv",
        "Multiprot.csv",
        "SPa.csv",
        "SPb.csv",
        "SPe.csv",
        "TMa.csv",
        "TMb.csv",
        "TMc.csv",
        "MATT.csv",
        "RMSD.csv",
    ]

    for path in paths:
        mats = categorize(load_mat("data/%s" % path), data)

        mat = mats["lysozyme"]

        if path not in ("RMSD.csv",):
            mat = 1 / mat

        if method == "upper":
            mat = np.triu(mat)
        elif method == "lower":
            mat = np.transpose(np.tril(mat))
        elif method == "average":
            mat = (np.tril(mat) + np.triu(mat)) / 2

        plt.figure()

        Z = linkage(mat)

        dendrogram(Z, labels=mat.rlabels, orientation="right")

        plt.title("Hierarchical clustering for lysozyme")
        plt.ylabel("PDB ID")
        plt.savefig("img/for_lysozyme_%s/%s.png" % (method, path.split(".")[0]))
Example #2
0
def compare():
    """
    plot a series of hcluster dendragram for various methods on various sets of virus
    """

    paths = [
        "CEpiMatch.csv",
        "Multiprot.csv",
        "SPa.csv",
        "SPb.csv",
        "SPe.csv",
        "TMa.csv",
        "TMb.csv",
        "TMc.csv",
        "MATT.csv",
        "RMSD.csv",
    ]

    for path in paths:
        mats = categorize(load_mat("data/%s" % path), data)

        for name, mat in mats.items():
            if path not in ("RMSD.csv",):
                mat = 1 / mat

            Z = linkage(mat)

            plt.figure()

            dendrogram(Z, labels=mat.rlabels, orientation="right")

            type_name = path.split(".")[0]

            plt.title("%s  %s" % (name, type_name))
            plt.ylabel("PDB ID")
            plt.savefig("img/%s/%s.png" % (name, type_name))