def for_lysozyme(): """ try various version of distance matrix for only lysozyme """ import sys method = sys.argv[1] paths = [ "CEpiMatch.csv", "Multiprot.csv", "SPa.csv", "SPb.csv", "SPe.csv", "TMa.csv", "TMb.csv", "TMc.csv", "MATT.csv", "RMSD.csv", ] for path in paths: mats = categorize(load_mat("data/%s" % path), data) mat = mats["lysozyme"] if path not in ("RMSD.csv",): mat = 1 / mat if method == "upper": mat = np.triu(mat) elif method == "lower": mat = np.transpose(np.tril(mat)) elif method == "average": mat = (np.tril(mat) + np.triu(mat)) / 2 plt.figure() Z = linkage(mat) dendrogram(Z, labels=mat.rlabels, orientation="right") plt.title("Hierarchical clustering for lysozyme") plt.ylabel("PDB ID") plt.savefig("img/for_lysozyme_%s/%s.png" % (method, path.split(".")[0]))
def compare(): """ plot a series of hcluster dendragram for various methods on various sets of virus """ paths = [ "CEpiMatch.csv", "Multiprot.csv", "SPa.csv", "SPb.csv", "SPe.csv", "TMa.csv", "TMb.csv", "TMc.csv", "MATT.csv", "RMSD.csv", ] for path in paths: mats = categorize(load_mat("data/%s" % path), data) for name, mat in mats.items(): if path not in ("RMSD.csv",): mat = 1 / mat Z = linkage(mat) plt.figure() dendrogram(Z, labels=mat.rlabels, orientation="right") type_name = path.split(".")[0] plt.title("%s %s" % (name, type_name)) plt.ylabel("PDB ID") plt.savefig("img/%s/%s.png" % (name, type_name))